diff options
author | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
---|---|---|
committer | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
commit | 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch) | |
tree | 976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/config/pa/pa.c | |
download | cbb-gcc-4.6.4-15d2061ac0796199866debe9ac87130894b0cdd3.tar.bz2 cbb-gcc-4.6.4-15d2061ac0796199866debe9ac87130894b0cdd3.tar.xz |
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig;
imported gcc-4.6.4 source tree from verified upstream tarball.
downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.
if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
Diffstat (limited to 'gcc/config/pa/pa.c')
-rw-r--r-- | gcc/config/pa/pa.c | 10471 |
1 files changed, 10471 insertions, 0 deletions
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c new file mode 100644 index 000000000..8a4445fdc --- /dev/null +++ b/gcc/config/pa/pa.c @@ -0,0 +1,10471 @@ +/* Subroutines for insn-output.c for HPPA. + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "output.h" +#include "except.h" +#include "expr.h" +#include "optabs.h" +#include "reload.h" +#include "integrate.h" +#include "function.h" +#include "diagnostic-core.h" +#include "ggc.h" +#include "recog.h" +#include "predict.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "langhooks.h" +#include "df.h" + +/* Return nonzero if there is a bypass for the output of + OUT_INSN and the fp store IN_INSN. */ +int +hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn) +{ + enum machine_mode store_mode; + enum machine_mode other_mode; + rtx set; + + if (recog_memoized (in_insn) < 0 + || (get_attr_type (in_insn) != TYPE_FPSTORE + && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) + || recog_memoized (out_insn) < 0) + return 0; + + store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); + + set = single_set (out_insn); + if (!set) + return 0; + + other_mode = GET_MODE (SET_SRC (set)); + + return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); +} + + +#ifndef DO_FRAME_NOTES +#ifdef INCOMING_RETURN_ADDR_RTX +#define DO_FRAME_NOTES 1 +#else +#define DO_FRAME_NOTES 0 +#endif +#endif + +static void pa_option_override (void); +static void copy_reg_pointer (rtx, rtx); +static void fix_range (const char *); +static bool pa_handle_option (size_t, const char *, int); +static int hppa_register_move_cost (enum machine_mode mode, reg_class_t, + reg_class_t); +static int hppa_address_cost (rtx, bool); +static bool hppa_rtx_costs (rtx, int, int, int *, bool); +static inline rtx force_mode (enum machine_mode, rtx); +static void pa_reorg (void); +static void pa_combine_instructions (void); +static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx); +static bool forward_branch_p (rtx); +static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); +static int compute_movmem_length (rtx); +static int compute_clrmem_length (rtx); +static bool pa_assemble_integer (rtx, unsigned int, int); +static void remove_useless_addtr_insns (int); +static void store_reg (int, HOST_WIDE_INT, int); +static void store_reg_modify (int, int, HOST_WIDE_INT); +static void load_reg (int, HOST_WIDE_INT, int); +static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); +static rtx pa_function_value (const_tree, const_tree, bool); +static rtx pa_libcall_value (enum machine_mode, const_rtx); +static bool pa_function_value_regno_p (const unsigned int); +static void pa_output_function_prologue (FILE *, HOST_WIDE_INT); +static void update_total_code_bytes (unsigned int); +static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT); +static int pa_adjust_cost (rtx, rtx, rtx, int); +static int pa_adjust_priority (rtx, int); +static int pa_issue_rate (void); +static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; +static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) + ATTRIBUTE_UNUSED; +static void pa_encode_section_info (tree, rtx, int); +static const char *pa_strip_name_encoding (const char *); +static bool pa_function_ok_for_sibcall (tree, tree); +static void pa_globalize_label (FILE *, const char *) + ATTRIBUTE_UNUSED; +static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree); +#if !defined(USE_COLLECT2) +static void pa_asm_out_constructor (rtx, int); +static void pa_asm_out_destructor (rtx, int); +#endif +static void pa_init_builtins (void); +static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int); +static rtx hppa_builtin_saveregs (void); +static void hppa_va_start (tree, rtx); +static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); +static bool pa_scalar_mode_supported_p (enum machine_mode); +static bool pa_commutative_p (const_rtx x, int outer_code); +static void copy_fp_args (rtx) ATTRIBUTE_UNUSED; +static int length_fp_args (rtx) ATTRIBUTE_UNUSED; +static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode); +static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; +static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; +static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; +static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; +static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; +static void pa_som_file_start (void) ATTRIBUTE_UNUSED; +static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; +static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; +static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; +static void output_deferred_plabels (void); +static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; +#ifdef ASM_OUTPUT_EXTERNAL_REAL +static void pa_hpux_file_end (void); +#endif +#if HPUX_LONG_DOUBLE_LIBRARY +static void pa_hpux_init_libfuncs (void); +#endif +static rtx pa_struct_value_rtx (tree, int); +static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree); +static struct machine_function * pa_init_machine_status (void); +static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t, + enum machine_mode, + secondary_reload_info *); +static void pa_extra_live_on_entry (bitmap); +static enum machine_mode pa_promote_function_mode (const_tree, + enum machine_mode, int *, + const_tree, int); + +static void pa_asm_trampoline_template (FILE *); +static void pa_trampoline_init (rtx, tree, rtx); +static rtx pa_trampoline_adjust_address (rtx); +static rtx pa_delegitimize_address (rtx); +static bool pa_print_operand_punct_valid_p (unsigned char); +static rtx pa_internal_arg_pointer (void); +static bool pa_can_eliminate (const int, const int); +static void pa_conditional_register_usage (void); +static enum machine_mode pa_c_mode_for_suffix (char); +static section *pa_function_section (tree, enum node_frequency, bool, bool); +static unsigned int pa_section_type_flags (tree, const char *, int); + +/* The following extra sections are only used for SOM. */ +static GTY(()) section *som_readonly_data_section; +static GTY(()) section *som_one_only_readonly_data_section; +static GTY(()) section *som_one_only_data_section; + +/* Which cpu we are scheduling for. */ +enum processor_type pa_cpu = TARGET_SCHED_DEFAULT; + +/* The UNIX standard to use for predefines and linking. */ +int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993; + +/* Counts for the number of callee-saved general and floating point + registers which were saved by the current function's prologue. */ +static int gr_saved, fr_saved; + +/* Boolean indicating whether the return pointer was saved by the + current function's prologue. */ +static bool rp_saved; + +static rtx find_addr_reg (rtx); + +/* Keep track of the number of bytes we have output in the CODE subspace + during this compilation so we'll know when to emit inline long-calls. */ +unsigned long total_code_bytes; + +/* The last address of the previous function plus the number of bytes in + associated thunks that have been output. This is used to determine if + a thunk can use an IA-relative branch to reach its target function. */ +static unsigned int last_address; + +/* Variables to handle plabels that we discover are necessary at assembly + output time. They are output after the current function. */ +struct GTY(()) deferred_plabel +{ + rtx internal_label; + rtx symbol; +}; +static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * + deferred_plabels; +static size_t n_deferred_plabels = 0; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options pa_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + + +/* Initialize the GCC target structure. */ + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE pa_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE pa_option_optimization_table + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER pa_assemble_integer + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE pa_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE pa_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST pa_adjust_cost +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE pa_issue_rate + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall + +#undef TARGET_COMMUTATIVE_P +#define TARGET_COMMUTATIVE_P pa_commutative_p + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +#undef TARGET_ASM_FILE_END +#ifdef ASM_OUTPUT_EXTERNAL_REAL +#define TARGET_ASM_FILE_END pa_hpux_file_end +#else +#define TARGET_ASM_FILE_END output_deferred_plabels +#endif + +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p + +#if !defined(USE_COLLECT2) +#undef TARGET_ASM_CONSTRUCTOR +#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor +#undef TARGET_ASM_DESTRUCTOR +#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor +#endif + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT) +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION pa_handle_option + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS pa_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN pa_expand_builtin + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS hppa_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hppa_address_cost + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg + +#if HPUX_LONG_DOUBLE_LIBRARY +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs +#endif + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY pa_return_in_memory +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference +#undef TARGET_CALLEE_COPIES +#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG pa_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary + +#undef TARGET_EXPAND_BUILTIN_SAVEREGS +#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD pa_secondary_reload + +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT pa_trampoline_init +#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address +#undef TARGET_INTERNAL_ARG_POINTER +#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE pa_can_eliminate +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage +#undef TARGET_C_MODE_FOR_SUFFIX +#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix +#undef TARGET_ASM_FUNCTION_SECTION +#define TARGET_ASM_FUNCTION_SECTION pa_function_section + +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Parse the -mfixed-range= option string. */ + +static void +fix_range (const char *const_str) +{ + int i, first, last; + char *str, *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. This is + used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ + + i = strlen (const_str); + str = (char *) alloca (i + 1); + memcpy (str, const_str, i + 1); + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning (0, "value of -mfixed-range must have form REG1-REG2"); + return; + } + *dash = '\0'; + + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning (0, "unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning (0, "unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning (0, "%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = call_used_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } + + /* Check if all floating point registers have been fixed. */ + for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) + if (!fixed_regs[i]) + break; + + if (i > FP_REG_LAST) + target_flags |= MASK_DISABLE_FPREGS; +} + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_mnosnake: + case OPT_mpa_risc_1_0: + case OPT_march_1_0: + target_flags &= ~(MASK_PA_11 | MASK_PA_20); + return true; + + case OPT_msnake: + case OPT_mpa_risc_1_1: + case OPT_march_1_1: + target_flags &= ~MASK_PA_20; + target_flags |= MASK_PA_11; + return true; + + case OPT_mpa_risc_2_0: + case OPT_march_2_0: + target_flags |= MASK_PA_11 | MASK_PA_20; + return true; + + case OPT_mschedule_: + if (strcmp (arg, "8000") == 0) + pa_cpu = PROCESSOR_8000; + else if (strcmp (arg, "7100") == 0) + pa_cpu = PROCESSOR_7100; + else if (strcmp (arg, "700") == 0) + pa_cpu = PROCESSOR_700; + else if (strcmp (arg, "7100LC") == 0) + pa_cpu = PROCESSOR_7100LC; + else if (strcmp (arg, "7200") == 0) + pa_cpu = PROCESSOR_7200; + else if (strcmp (arg, "7300") == 0) + pa_cpu = PROCESSOR_7300; + else + return false; + return true; + + case OPT_mfixed_range_: + fix_range (arg); + return true; + +#if TARGET_HPUX + case OPT_munix_93: + flag_pa_unix = 1993; + return true; +#endif + +#if TARGET_HPUX_10_10 + case OPT_munix_95: + flag_pa_unix = 1995; + return true; +#endif + +#if TARGET_HPUX_11_11 + case OPT_munix_98: + flag_pa_unix = 1998; + return true; +#endif + + default: + return true; + } +} + +/* Implement the TARGET_OPTION_OVERRIDE hook. */ + +static void +pa_option_override (void) +{ + /* Unconditional branches in the delay slot are not compatible with dwarf2 + call frame information. There is no benefit in using this optimization + on PA8000 and later processors. */ + if (pa_cpu >= PROCESSOR_8000 + || (targetm.except_unwind_info (&global_options) == UI_DWARF2 + && flag_exceptions) + || flag_unwind_tables) + target_flags &= ~MASK_JUMP_IN_DELAY; + + if (flag_pic && TARGET_PORTABLE_RUNTIME) + { + warning (0, "PIC code generation is not supported in the portable runtime model"); + } + + if (flag_pic && TARGET_FAST_INDIRECT_CALLS) + { + warning (0, "PIC code generation is not compatible with fast indirect calls"); + } + + if (! TARGET_GAS && write_symbols != NO_DEBUG) + { + warning (0, "-g is only supported when using GAS on this processor,"); + warning (0, "-g option disabled"); + write_symbols = NO_DEBUG; + } + + /* We only support the "big PIC" model now. And we always generate PIC + code when in 64bit mode. */ + if (flag_pic == 1 || TARGET_64BIT) + flag_pic = 2; + + /* Disable -freorder-blocks-and-partition as we don't support hot and + cold partitioning. */ + if (flag_reorder_blocks_and_partition) + { + inform (input_location, + "-freorder-blocks-and-partition does not work " + "on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + + /* We can't guarantee that .dword is available for 32-bit targets. */ + if (UNITS_PER_WORD == 4) + targetm.asm_out.aligned_op.di = NULL; + + /* The unaligned ops are only available when using GAS. */ + if (!TARGET_GAS) + { + targetm.asm_out.unaligned_op.hi = NULL; + targetm.asm_out.unaligned_op.si = NULL; + targetm.asm_out.unaligned_op.di = NULL; + } + + init_machine_status = pa_init_machine_status; +} + +enum pa_builtins +{ + PA_BUILTIN_COPYSIGNQ, + PA_BUILTIN_FABSQ, + PA_BUILTIN_INFQ, + PA_BUILTIN_HUGE_VALQ, + PA_BUILTIN_max +}; + +static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max]; + +static void +pa_init_builtins (void) +{ +#ifdef DONT_HAVE_FPUTC_UNLOCKED + built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = + built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED]; + implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] + = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED]; +#endif +#if TARGET_HPUX_11 + if (built_in_decls [BUILT_IN_FINITE]) + set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite"); + if (built_in_decls [BUILT_IN_FINITEF]) + set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef"); +#endif + + if (HPUX_LONG_DOUBLE_LIBRARY) + { + tree decl, ftype; + + /* Under HPUX, the __float128 type is a synonym for "long double". */ + (*lang_hooks.types.register_builtin_type) (long_double_type_node, + "__float128"); + + /* TFmode support builtins. */ + ftype = build_function_type_list (long_double_type_node, + long_double_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_fabsq", ftype, + PA_BUILTIN_FABSQ, BUILT_IN_MD, + "_U_Qfabs", NULL_TREE); + TREE_READONLY (decl) = 1; + pa_builtins[PA_BUILTIN_FABSQ] = decl; + + ftype = build_function_type_list (long_double_type_node, + long_double_type_node, + long_double_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_copysignq", ftype, + PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD, + "_U_Qfcopysign", NULL_TREE); + TREE_READONLY (decl) = 1; + pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl; + + ftype = build_function_type (long_double_type_node, void_list_node); + decl = add_builtin_function ("__builtin_infq", ftype, + PA_BUILTIN_INFQ, BUILT_IN_MD, + NULL, NULL_TREE); + pa_builtins[PA_BUILTIN_INFQ] = decl; + + decl = add_builtin_function ("__builtin_huge_valq", ftype, + PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD, + NULL, NULL_TREE); + pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl; + } +} + +static rtx +pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + switch (fcode) + { + case PA_BUILTIN_FABSQ: + case PA_BUILTIN_COPYSIGNQ: + return expand_call (exp, target, ignore); + + case PA_BUILTIN_INFQ: + case PA_BUILTIN_HUGE_VALQ: + { + enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); + REAL_VALUE_TYPE inf; + rtx tmp; + + real_inf (&inf); + tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode); + + tmp = validize_mem (force_const_mem (target_mode, tmp)); + + if (target == 0) + target = gen_reg_rtx (target_mode); + + emit_move_insn (target, tmp); + return target; + } + + default: + gcc_unreachable (); + } + + return NULL_RTX; +} + +/* Function to init struct machine_function. + This will be called, via a pointer variable, + from push_function_context. */ + +static struct machine_function * +pa_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* If FROM is a probable pointer register, mark TO as a probable + pointer register with the same pointer alignment as FROM. */ + +static void +copy_reg_pointer (rtx to, rtx from) +{ + if (REG_POINTER (from)) + mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); +} + +/* Return 1 if X contains a symbolic expression. We know these + expressions will have one of a few well defined forms, so + we need only check those forms. */ +int +symbolic_expression_p (rtx x) +{ + + /* Strip off any HIGH. */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + return (symbolic_operand (x, VOIDmode)); +} + +/* Accept any constant that can be moved in one instruction into a + general register. */ +int +cint_ok_for_move (HOST_WIDE_INT ival) +{ + /* OK if ldo, ldil, or zdepi, can be used. */ + return (VAL_14_BITS_P (ival) + || ldil_cint_p (ival) + || zdepi_cint_p (ival)); +} + +/* Return truth value of whether OP can be used as an operand in a + adddi3 insn. */ +int +adddi3_operand (rtx op, enum machine_mode mode) +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT + && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op)))); +} + +/* True iff the operand OP can be used as the destination operand of + an integer store. This also implies the operand could be used as + the source operand of an integer load. Symbolic, lo_sum and indexed + memory operands are not allowed. We accept reloading pseudos and + other memory operands. */ +int +integer_store_memory_operand (rtx op, enum machine_mode mode) +{ + return ((reload_in_progress + && REG_P (op) + && REGNO (op) >= FIRST_PSEUDO_REGISTER + && reg_renumber [REGNO (op)] < 0) + || (GET_CODE (op) == MEM + && (reload_in_progress || memory_address_p (mode, XEXP (op, 0))) + && !symbolic_memory_operand (op, VOIDmode) + && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0)) + && !IS_INDEX_ADDR_P (XEXP (op, 0)))); +} + +/* True iff ldil can be used to load this CONST_INT. The least + significant 11 bits of the value must be zero and the value must + not change sign when extended from 32 to 64 bits. */ +int +ldil_cint_p (HOST_WIDE_INT ival) +{ + HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff); + + return x == 0 || x == ((HOST_WIDE_INT) -1 << 31); +} + +/* True iff zdepi can be used to generate this CONST_INT. + zdepi first sign extends a 5-bit signed number to a given field + length, then places this field anywhere in a zero. */ +int +zdepi_cint_p (unsigned HOST_WIDE_INT x) +{ + unsigned HOST_WIDE_INT lsb_mask, t; + + /* This might not be obvious, but it's at least fast. + This function is critical; we don't have the time loops would take. */ + lsb_mask = x & -x; + t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); + /* Return true iff t is a power of two. */ + return ((t & (t - 1)) == 0); +} + +/* True iff depi or extru can be used to compute (reg & mask). + Accept bit pattern like these: + 0....01....1 + 1....10....0 + 1..10..01..1 */ +int +and_mask_p (unsigned HOST_WIDE_INT mask) +{ + mask = ~mask; + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* True iff depi can be used to compute (reg | MASK). */ +int +ior_mask_p (unsigned HOST_WIDE_INT mask) +{ + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* Legitimize PIC addresses. If the address is already + position-independent, we return ORIG. Newly generated + position-independent addresses go to REG. If we need more + than one register, we lose. */ + +rtx +legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +{ + rtx pic_ref = orig; + + gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); + + /* Labels need special handling. */ + if (pic_label_operand (orig, mode)) + { + rtx insn; + + /* We do not want to go through the movXX expanders here since that + would create recursion. + + Nor do we really want to call a generator for a named pattern + since that requires multiple patterns if we want to support + multiple word sizes. + + So instead we just emit the raw set, which avoids the movXX + expanders completely. */ + mark_reg_pointer (reg, BITS_PER_UNIT); + insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig)); + + /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ + add_reg_note (insn, REG_EQUAL, orig); + + /* During and after reload, we need to generate a REG_LABEL_OPERAND note + and update LABEL_NUSES because this is not done automatically. */ + if (reload_in_progress || reload_completed) + { + /* Extract LABEL_REF. */ + if (GET_CODE (orig) == CONST) + orig = XEXP (XEXP (orig, 0), 0); + /* Extract CODE_LABEL. */ + orig = XEXP (orig, 0); + add_reg_note (insn, REG_LABEL_OPERAND, orig); + LABEL_NUSES (orig)++; + } + crtl->uses_pic_offset_table = 1; + return reg; + } + if (GET_CODE (orig) == SYMBOL_REF) + { + rtx insn, tmp_reg; + + gcc_assert (reg); + + /* Before reload, allocate a temporary register for the intermediate + result. This allows the sequence to be deleted when the final + result is unused and the insns are trivially dead. */ + tmp_reg = ((reload_in_progress || reload_completed) + ? reg : gen_reg_rtx (Pmode)); + + if (function_label_operand (orig, mode)) + { + /* Force function label into memory in word mode. */ + orig = XEXP (force_const_mem (word_mode, orig), 0); + /* Load plabel address from DLT. */ + emit_move_insn (tmp_reg, + gen_rtx_PLUS (word_mode, pic_offset_table_rtx, + gen_rtx_HIGH (word_mode, orig))); + pic_ref + = gen_const_mem (Pmode, + gen_rtx_LO_SUM (Pmode, tmp_reg, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, orig), + UNSPEC_DLTIND14R))); + emit_move_insn (reg, pic_ref); + /* Now load address of function descriptor. */ + pic_ref = gen_rtx_MEM (Pmode, reg); + } + else + { + /* Load symbol reference from DLT. */ + emit_move_insn (tmp_reg, + gen_rtx_PLUS (word_mode, pic_offset_table_rtx, + gen_rtx_HIGH (word_mode, orig))); + pic_ref + = gen_const_mem (Pmode, + gen_rtx_LO_SUM (Pmode, tmp_reg, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, orig), + UNSPEC_DLTIND14R))); + } + + crtl->uses_pic_offset_table = 1; + mark_reg_pointer (reg, BITS_PER_UNIT); + insn = emit_move_insn (reg, pic_ref); + + /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ + set_unique_reg_note (insn, REG_EQUAL, orig); + + return reg; + } + else if (GET_CODE (orig) == CONST) + { + rtx base; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) + return orig; + + gcc_assert (reg); + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + + base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); + orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, + base == reg ? 0 : reg); + + if (GET_CODE (orig) == CONST_INT) + { + if (INT_14_BITS (orig)) + return plus_constant (base, INTVAL (orig)); + orig = force_reg (Pmode, orig); + } + pic_ref = gen_rtx_PLUS (Pmode, base, orig); + /* Likewise, should we set special REG_NOTEs here? */ + } + + return pic_ref; +} + +static GTY(()) rtx gen_tls_tga; + +static rtx +gen_tls_get_addr (void) +{ + if (!gen_tls_tga) + gen_tls_tga = init_one_libfunc ("__tls_get_addr"); + return gen_tls_tga; +} + +static rtx +hppa_tls_call (rtx arg) +{ + rtx ret; + + ret = gen_reg_rtx (Pmode); + emit_library_call_value (gen_tls_get_addr (), ret, + LCT_CONST, Pmode, 1, arg, Pmode); + + return ret; +} + +static rtx +legitimize_tls_address (rtx addr) +{ + rtx ret, insn, tmp, t1, t2, tp; + enum tls_model model = SYMBOL_REF_TLS_MODEL (addr); + + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + tmp = gen_reg_rtx (Pmode); + if (flag_pic) + emit_insn (gen_tgd_load_pic (tmp, addr)); + else + emit_insn (gen_tgd_load (tmp, addr)); + ret = hppa_tls_call (tmp); + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + ret = gen_reg_rtx (Pmode); + tmp = gen_reg_rtx (Pmode); + start_sequence (); + if (flag_pic) + emit_insn (gen_tld_load_pic (tmp, addr)); + else + emit_insn (gen_tld_load (tmp, addr)); + t1 = hppa_tls_call (tmp); + insn = get_insns (); + end_sequence (); + t2 = gen_reg_rtx (Pmode); + emit_libcall_block (insn, t2, t1, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLSLDBASE)); + emit_insn (gen_tld_offset_load (ret, addr, t2)); + break; + + case TLS_MODEL_INITIAL_EXEC: + tp = gen_reg_rtx (Pmode); + tmp = gen_reg_rtx (Pmode); + ret = gen_reg_rtx (Pmode); + emit_insn (gen_tp_load (tp)); + if (flag_pic) + emit_insn (gen_tie_load_pic (tmp, addr)); + else + emit_insn (gen_tie_load (tmp, addr)); + emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); + break; + + case TLS_MODEL_LOCAL_EXEC: + tp = gen_reg_rtx (Pmode); + ret = gen_reg_rtx (Pmode); + emit_insn (gen_tp_load (tp)); + emit_insn (gen_tle_load (ret, addr, tp)); + break; + + default: + gcc_unreachable (); + } + + return ret; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the PA, transform: + + memory(X + <large int>) + + into: + + if (<large int> & mask) >= 16 + Y = (<large int> & ~mask) + mask + 1 Round up. + else + Y = (<large int> & ~mask) Round down. + Z = X + Y + memory (Z + (<large int> - Y)); + + This is for CSE to find several similar references, and only use one Z. + + X can either be a SYMBOL_REF or REG, but because combine cannot + perform a 4->2 combination we do nothing for SYMBOL_REF + D where + D will not fit in 14 bits. + + MODE_FLOAT references allow displacements which fit in 5 bits, so use + 0x1f as the mask. + + MODE_INT references allow displacements which fit in 14 bits, so use + 0x3fff as the mask. + + This relies on the fact that most mode MODE_FLOAT references will use FP + registers and most mode MODE_INT references will use integer registers. + (In the rare case of an FP register used in an integer MODE, we depend + on secondary reloads to clean things up.) + + + It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special + manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed + addressing modes to be used). + + Put X and Z into registers. Then put the entire expression into + a register. */ + +rtx +hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx orig = x; + + /* We need to canonicalize the order of operands in unscaled indexed + addresses since the code that checks if an address is valid doesn't + always try both orders. */ + if (!TARGET_NO_SPACE_REGS + && GET_CODE (x) == PLUS + && GET_MODE (x) == Pmode + && REG_P (XEXP (x, 0)) + && REG_P (XEXP (x, 1)) + && REG_POINTER (XEXP (x, 0)) + && !REG_POINTER (XEXP (x, 1))) + return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); + + if (PA_SYMBOL_REF_TLS_P (x)) + return legitimize_tls_address (x); + else if (flag_pic) + return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); + + /* Strip off CONST. */ + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + + /* Special case. Get the SYMBOL_REF into a register and use indexing. + That should always be safe. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) + { + rtx reg = force_reg (Pmode, XEXP (x, 1)); + return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); + } + + /* Note we must reject symbols which represent function addresses + since the assembler/linker can't handle arithmetic on plabels. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == CONST_INT + && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) + || GET_CODE (XEXP (x, 0)) == REG)) + { + rtx int_part, ptr_reg; + int newoffset; + int offset = INTVAL (XEXP (x, 1)); + int mask; + + mask = (GET_MODE_CLASS (mode) == MODE_FLOAT + ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff); + + /* Choose which way to round the offset. Round up if we + are >= halfway to the next boundary. */ + if ((offset & mask) >= ((mask + 1) / 2)) + newoffset = (offset & ~ mask) + mask + 1; + else + newoffset = (offset & ~ mask); + + /* If the newoffset will not fit in 14 bits (ldo), then + handling this would take 4 or 5 instructions (2 to load + the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to + add the new offset and the SYMBOL_REF.) Combine can + not handle 4->2 or 5->2 combinations, so do not create + them. */ + if (! VAL_14_BITS_P (newoffset) + && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) + { + rtx const_part = plus_constant (XEXP (x, 0), newoffset); + rtx tmp_reg + = force_reg (Pmode, + gen_rtx_HIGH (Pmode, const_part)); + ptr_reg + = force_reg (Pmode, + gen_rtx_LO_SUM (Pmode, + tmp_reg, const_part)); + } + else + { + if (! VAL_14_BITS_P (newoffset)) + int_part = force_reg (Pmode, GEN_INT (newoffset)); + else + int_part = GEN_INT (newoffset); + + ptr_reg = force_reg (Pmode, + gen_rtx_PLUS (Pmode, + force_reg (Pmode, XEXP (x, 0)), + int_part)); + } + return plus_constant (ptr_reg, offset - newoffset); + } + + /* Handle (plus (mult (a) (shadd_constant)) (b)). */ + + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) + && (OBJECT_P (XEXP (x, 1)) + || GET_CODE (XEXP (x, 1)) == SUBREG) + && GET_CODE (XEXP (x, 1)) != CONST) + { + int val = INTVAL (XEXP (XEXP (x, 0), 1)); + rtx reg1, reg2; + + reg1 = XEXP (x, 1); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + reg2, + GEN_INT (val)), + reg1)); + } + + /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). + + Only do so for floating point modes since this is more speculative + and we lose if it's an integer store. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) + && (mode == SFmode || mode == DFmode)) + { + + /* First, try and figure out what to use as a base register. */ + rtx reg1, reg2, base, idx; + + reg1 = XEXP (XEXP (x, 0), 1); + reg2 = XEXP (x, 1); + base = NULL_RTX; + idx = NULL_RTX; + + /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], + then emit_move_sequence will turn on REG_POINTER so we'll know + it's a base register below. */ + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + /* Figure out what the base and index are. */ + + if (GET_CODE (reg1) == REG + && REG_POINTER (reg1)) + { + base = reg1; + idx = gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + XEXP (XEXP (XEXP (x, 0), 0), 0), + XEXP (XEXP (XEXP (x, 0), 0), 1)), + XEXP (x, 1)); + } + else if (GET_CODE (reg2) == REG + && REG_POINTER (reg2)) + { + base = reg2; + idx = XEXP (x, 0); + } + + if (base == 0) + return orig; + + /* If the index adds a large constant, try to scale the + constant so that it can be loaded with only one insn. */ + if (GET_CODE (XEXP (idx, 1)) == CONST_INT + && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) + / INTVAL (XEXP (XEXP (idx, 0), 1))) + && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) + { + /* Divide the CONST_INT by the scale factor, then add it to A. */ + int val = INTVAL (XEXP (idx, 1)); + + val /= INTVAL (XEXP (XEXP (idx, 0), 1)); + reg1 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); + + /* We can now generate a simple scaled indexed address. */ + return + force_reg + (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg1, + XEXP (XEXP (idx, 0), 1)), + base)); + } + + /* If B + C is still a valid base register, then add them. */ + if (GET_CODE (XEXP (idx, 1)) == CONST_INT + && INTVAL (XEXP (idx, 1)) <= 4096 + && INTVAL (XEXP (idx, 1)) >= -4096) + { + int val = INTVAL (XEXP (XEXP (idx, 0), 1)); + rtx reg1, reg2; + + reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); + + reg2 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg2) != CONST_INT) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + reg2, + GEN_INT (val)), + reg1)); + } + + /* Get the index into a register, then add the base + index and + return a register holding the result. */ + + /* First get A into a register. */ + reg1 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + /* And get B into a register. */ + reg2 = XEXP (idx, 1); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + reg1 = force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg1, + XEXP (XEXP (idx, 0), 1)), + reg2)); + + /* Add the result to our base register and return. */ + return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); + + } + + /* Uh-oh. We might have an address for x[n-100000]. This needs + special handling to avoid creating an indexed memory address + with x-100000 as the base. + + If the constant part is small enough, then it's still safe because + there is a guard page at the beginning and end of the data segment. + + Scaled references are common enough that we want to try and rearrange the + terms so that we can use indexing for these addresses too. Only + do the optimization for floatint point modes. */ + + if (GET_CODE (x) == PLUS + && symbolic_expression_p (XEXP (x, 1))) + { + /* Ugly. We modify things here so that the address offset specified + by the index expression is computed first, then added to x to form + the entire address. */ + + rtx regx1, regx2, regy1, regy2, y; + + /* Strip off any CONST. */ + y = XEXP (x, 1); + if (GET_CODE (y) == CONST) + y = XEXP (y, 0); + + if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) + { + /* See if this looks like + (plus (mult (reg) (shadd_const)) + (const (plus (symbol_ref) (const_int)))) + + Where const_int is small. In that case the const + expression is a valid pointer for indexing. + + If const_int is big, but can be divided evenly by shadd_const + and added to (reg). This allows more scaled indexed addresses. */ + if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF + && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) >= -4096 + && INTVAL (XEXP (y, 1)) <= 4095 + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) + { + int val = INTVAL (XEXP (XEXP (x, 0), 1)); + rtx reg1, reg2; + + reg1 = XEXP (x, 1); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + reg2, + GEN_INT (val)), + reg1)); + } + else if ((mode == DFmode || mode == SFmode) + && GET_CODE (XEXP (y, 0)) == SYMBOL_REF + && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) + { + regx1 + = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) + / INTVAL (XEXP (XEXP (x, 0), 1)))); + regx2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (regx2) != REG) + regx2 = force_reg (Pmode, force_operand (regx2, 0)); + regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, + regx2, regx1)); + return + force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, regx2, + XEXP (XEXP (x, 0), 1)), + force_reg (Pmode, XEXP (y, 0)))); + } + else if (GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) >= -4096 + && INTVAL (XEXP (y, 1)) <= 4095) + { + /* This is safe because of the guard page at the + beginning and end of the data space. Just + return the original address. */ + return orig; + } + else + { + /* Doesn't look like one we can optimize. */ + regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); + regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); + regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); + regx1 = force_reg (Pmode, + gen_rtx_fmt_ee (GET_CODE (y), Pmode, + regx1, regy2)); + return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); + } + } + } + + return orig; +} + +/* Implement the TARGET_REGISTER_MOVE_COST hook. + + Compute extra cost of moving data between one register class + and another. + + Make moves from SAR so expensive they should never happen. We used to + have 0xffff here, but that generates overflow in rare cases. + + Copies involving a FP register and a non-FP register are relatively + expensive because they must go through memory. + + Other copies are reasonably cheap. */ + +static int +hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + if (from == SHIFT_REGS) + return 0x100; + else if (to == SHIFT_REGS && FP_REG_CLASS_P (from)) + return 18; + else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to)) + || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from))) + return 16; + else + return 2; +} + +/* For the HPPA, REG and REG+CONST is cost 0 + and addresses involving symbolic constants are cost 2. + + PIC addresses are very expensive. + + It is no coincidence that this has the same structure + as GO_IF_LEGITIMATE_ADDRESS. */ + +static int +hppa_address_cost (rtx X, + bool speed ATTRIBUTE_UNUSED) +{ + switch (GET_CODE (X)) + { + case REG: + case PLUS: + case LO_SUM: + return 1; + case HIGH: + return 2; + default: + return 4; + } +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +hppa_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) +{ + switch (code) + { + case CONST_INT: + if (INTVAL (x) == 0) + *total = 0; + else if (INT_14_BITS (x)) + *total = 1; + else + *total = 2; + return true; + + case HIGH: + *total = 2; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = 4; + return true; + + case CONST_DOUBLE: + if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) + && outer_code != SET) + *total = 0; + else + *total = 8; + return true; + + case MULT: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + *total = COSTS_N_INSNS (3); + else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) + *total = COSTS_N_INSNS (8); + else + *total = COSTS_N_INSNS (20); + return true; + + case DIV: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + *total = COSTS_N_INSNS (14); + return true; + } + /* FALLTHRU */ + + case UDIV: + case MOD: + case UMOD: + *total = COSTS_N_INSNS (60); + return true; + + case PLUS: /* this includes shNadd insns */ + case MINUS: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + *total = COSTS_N_INSNS (3); + else + *total = COSTS_N_INSNS (1); + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + *total = COSTS_N_INSNS (1); + return true; + + default: + return false; + } +} + +/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a + new rtx with the correct mode. */ +static inline rtx +force_mode (enum machine_mode mode, rtx orig) +{ + if (mode == GET_MODE (orig)) + return orig; + + gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); + + return gen_rtx_REG (mode, REGNO (orig)); +} + +/* Return 1 if *X is a thread-local symbol. */ + +static int +pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + return PA_SYMBOL_REF_TLS_P (*x); +} + +/* Return 1 if X contains a thread-local symbol. */ + +bool +pa_tls_referenced_p (rtx x) +{ + if (!TARGET_HAVE_TLS) + return false; + + return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0); +} + +/* Emit insns to move operands[1] into operands[0]. + + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move + normally. + + Note SCRATCH_REG may not be in the proper mode depending on how it + will be used. This routine is responsible for creating a new copy + of SCRATCH_REG in the proper mode. */ + +int +emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg) +{ + register rtx operand0 = operands[0]; + register rtx operand1 = operands[1]; + register rtx tem; + + /* We can only handle indexed addresses in the destination operand + of floating point stores. Thus, we need to break out indexed + addresses from the destination operand. */ + if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) + { + gcc_assert (can_create_pseudo_p ()); + + tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); + operand0 = replace_equiv_address (operand0, tem); + } + + /* On targets with non-equivalent space registers, break out unscaled + indexed addresses from the source operand before the final CSE. + We have to do this because the REG_POINTER flag is not correctly + carried through various optimization passes and CSE may substitute + a pseudo without the pointer set for one with the pointer set. As + a result, we loose various opportunities to create insns with + unscaled indexed addresses. */ + if (!TARGET_NO_SPACE_REGS + && !cse_not_expected + && GET_CODE (operand1) == MEM + && GET_CODE (XEXP (operand1, 0)) == PLUS + && REG_P (XEXP (XEXP (operand1, 0), 0)) + && REG_P (XEXP (XEXP (operand1, 0), 1))) + operand1 + = replace_equiv_address (operand1, + copy_to_mode_reg (Pmode, XEXP (operand1, 0))); + + if (scratch_reg + && reload_in_progress && GET_CODE (operand0) == REG + && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + operand0 = reg_equiv_mem[REGNO (operand0)]; + else if (scratch_reg + && reload_in_progress && GET_CODE (operand0) == SUBREG + && GET_CODE (SUBREG_REG (operand0)) == REG + && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) + { + /* We must not alter SUBREG_BYTE (operand0) since that would confuse + the code which tracks sets/uses for delete_output_reload. */ + rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), + reg_equiv_mem [REGNO (SUBREG_REG (operand0))], + SUBREG_BYTE (operand0)); + operand0 = alter_subreg (&temp); + } + + if (scratch_reg + && reload_in_progress && GET_CODE (operand1) == REG + && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) + operand1 = reg_equiv_mem[REGNO (operand1)]; + else if (scratch_reg + && reload_in_progress && GET_CODE (operand1) == SUBREG + && GET_CODE (SUBREG_REG (operand1)) == REG + && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) + { + /* We must not alter SUBREG_BYTE (operand0) since that would confuse + the code which tracks sets/uses for delete_output_reload. */ + rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), + reg_equiv_mem [REGNO (SUBREG_REG (operand1))], + SUBREG_BYTE (operand1)); + operand1 = alter_subreg (&temp); + } + + if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM + && ((tem = find_replacement (&XEXP (operand0, 0))) + != XEXP (operand0, 0))) + operand0 = replace_equiv_address (operand0, tem); + + if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM + && ((tem = find_replacement (&XEXP (operand1, 0))) + != XEXP (operand1, 0))) + operand1 = replace_equiv_address (operand1, tem); + + /* Handle secondary reloads for loads/stores of FP registers from + REG+D addresses where D does not fit in 5 or 14 bits, including + (subreg (mem (addr))) cases. */ + if (scratch_reg + && fp_reg_operand (operand0, mode) + && ((GET_CODE (operand1) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode), + XEXP (operand1, 0))) + || ((GET_CODE (operand1) == SUBREG + && GET_CODE (XEXP (operand1, 0)) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (XEXP (operand1, 0), 0)))))) + { + if (GET_CODE (operand1) == SUBREG) + operand1 = XEXP (operand1, 0); + + /* SCRATCH_REG will hold an address and maybe the actual data. We want + it in WORD_MODE regardless of what mode it was originally given + to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (!memory_address_p (Pmode, XEXP (operand1, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); + emit_move_insn (scratch_reg, + gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), + Pmode, + XEXP (XEXP (operand1, 0), 0), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, XEXP (operand1, 0)); + emit_insn (gen_rtx_SET (VOIDmode, operand0, + replace_equiv_address (operand1, scratch_reg))); + return 1; + } + else if (scratch_reg + && fp_reg_operand (operand1, mode) + && ((GET_CODE (operand0) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (operand0, 0))) + || ((GET_CODE (operand0) == SUBREG) + && GET_CODE (XEXP (operand0, 0)) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (XEXP (operand0, 0), 0))))) + { + if (GET_CODE (operand0) == SUBREG) + operand0 = XEXP (operand0, 0); + + /* SCRATCH_REG will hold an address and maybe the actual data. We want + it in WORD_MODE regardless of what mode it was originally given + to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (!memory_address_p (Pmode, XEXP (operand0, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, + 0)), + Pmode, + XEXP (XEXP (operand0, 0), + 0), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, XEXP (operand0, 0)); + emit_insn (gen_rtx_SET (VOIDmode, + replace_equiv_address (operand0, scratch_reg), + operand1)); + return 1; + } + /* Handle secondary reloads for loads of FP registers from constant + expressions by forcing the constant into memory. + + Use scratch_reg to hold the address of the memory location. + + The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return + NO_REGS when presented with a const_int and a register class + containing only FP registers. Doing so unfortunately creates + more problems than it solves. Fix this for 2.5. */ + else if (scratch_reg + && CONSTANT_P (operand1) + && fp_reg_operand (operand0, mode)) + { + rtx const_mem, xoperands[2]; + + /* SCRATCH_REG will hold an address and maybe the actual data. We want + it in WORD_MODE regardless of what mode it was originally given + to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + /* Force the constant into memory and put the address of the + memory location into scratch_reg. */ + const_mem = force_const_mem (mode, operand1); + xoperands[0] = scratch_reg; + xoperands[1] = XEXP (const_mem, 0); + emit_move_sequence (xoperands, Pmode, 0); + + /* Now load the destination register. */ + emit_insn (gen_rtx_SET (mode, operand0, + replace_equiv_address (const_mem, scratch_reg))); + return 1; + } + /* Handle secondary reloads for SAR. These occur when trying to load + the SAR from memory or a constant. */ + else if (scratch_reg + && GET_CODE (operand0) == REG + && REGNO (operand0) < FIRST_PSEUDO_REGISTER + && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS + && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) + { + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (GET_CODE (operand1) == MEM + && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) + { + /* We are reloading the address into the scratch register, so we + want to make sure the scratch register is a full register. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, + 0)), + Pmode, + XEXP (XEXP (operand1, 0), + 0), + scratch_reg)); + + /* Now we are going to load the scratch register from memory, + we want to load it in the same width as the original MEM, + which must be the same as the width of the ultimate destination, + OPERAND0. */ + scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); + + emit_move_insn (scratch_reg, + replace_equiv_address (operand1, scratch_reg)); + } + else + { + /* We want to load the scratch register using the same mode as + the ultimate destination. */ + scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); + + emit_move_insn (scratch_reg, operand1); + } + + /* And emit the insn to set the ultimate destination. We know that + the scratch register has the same mode as the destination at this + point. */ + emit_move_insn (operand0, scratch_reg); + return 1; + } + /* Handle the most common case: storing into a register. */ + else if (register_operand (operand0, mode)) + { + /* Legitimize TLS symbol references. This happens for references + that aren't a legitimate constant. */ + if (PA_SYMBOL_REF_TLS_P (operand1)) + operand1 = legitimize_tls_address (operand1); + + if (register_operand (operand1, mode) + || (GET_CODE (operand1) == CONST_INT + && cint_ok_for_move (INTVAL (operand1))) + || (operand1 == CONST0_RTX (mode)) + || (GET_CODE (operand1) == HIGH + && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) + /* Only `general_operands' can come here, so MEM is ok. */ + || GET_CODE (operand1) == MEM) + { + /* Various sets are created during RTL generation which don't + have the REG_POINTER flag correctly set. After the CSE pass, + instruction recognition can fail if we don't consistently + set this flag when performing register copies. This should + also improve the opportunities for creating insns that use + unscaled indexing. */ + if (REG_P (operand0) && REG_P (operand1)) + { + if (REG_POINTER (operand1) + && !REG_POINTER (operand0) + && !HARD_REGISTER_P (operand0)) + copy_reg_pointer (operand0, operand1); + } + + /* When MEMs are broken out, the REG_POINTER flag doesn't + get set. In some cases, we can set the REG_POINTER flag + from the declaration for the MEM. */ + if (REG_P (operand0) + && GET_CODE (operand1) == MEM + && !REG_POINTER (operand0)) + { + tree decl = MEM_EXPR (operand1); + + /* Set the register pointer flag and register alignment + if the declaration for this memory reference is a + pointer type. */ + if (decl) + { + tree type; + + /* If this is a COMPONENT_REF, use the FIELD_DECL from + tree operand 1. */ + if (TREE_CODE (decl) == COMPONENT_REF) + decl = TREE_OPERAND (decl, 1); + + type = TREE_TYPE (decl); + type = strip_array_types (type); + + if (POINTER_TYPE_P (type)) + { + int align; + + type = TREE_TYPE (type); + /* Using TYPE_ALIGN_OK is rather conservative as + only the ada frontend actually sets it. */ + align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type) + : BITS_PER_UNIT); + mark_reg_pointer (operand0, align); + } + } + } + + emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); + return 1; + } + } + else if (GET_CODE (operand0) == MEM) + { + if (mode == DFmode && operand1 == CONST0_RTX (mode) + && !(reload_in_progress || reload_completed)) + { + rtx temp = gen_reg_rtx (DFmode); + + emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); + emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); + return 1; + } + if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) + { + /* Run this case quickly. */ + emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); + return 1; + } + if (! (reload_in_progress || reload_completed)) + { + operands[0] = validize_mem (operand0); + operands[1] = operand1 = force_reg (mode, operand1); + } + } + + /* Simplify the source if we need to. + Note we do have to handle function labels here, even though we do + not consider them legitimate constants. Loop optimizations can + call the emit_move_xxx with one as a source. */ + if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) + || function_label_operand (operand1, mode) + || (GET_CODE (operand1) == HIGH + && symbolic_operand (XEXP (operand1, 0), mode))) + { + int ishighonly = 0; + + if (GET_CODE (operand1) == HIGH) + { + ishighonly = 1; + operand1 = XEXP (operand1, 0); + } + if (symbolic_operand (operand1, mode)) + { + /* Argh. The assembler and linker can't handle arithmetic + involving plabels. + + So we force the plabel into memory, load operand0 from + the memory location, then add in the constant part. */ + if ((GET_CODE (operand1) == CONST + && GET_CODE (XEXP (operand1, 0)) == PLUS + && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode)) + || function_label_operand (operand1, mode)) + { + rtx temp, const_part; + + /* Figure out what (if any) scratch register to use. */ + if (reload_in_progress || reload_completed) + { + scratch_reg = scratch_reg ? scratch_reg : operand0; + /* SCRATCH_REG will hold an address and maybe the actual + data. We want it in WORD_MODE regardless of what mode it + was originally given to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + } + else if (flag_pic) + scratch_reg = gen_reg_rtx (Pmode); + + if (GET_CODE (operand1) == CONST) + { + /* Save away the constant part of the expression. */ + const_part = XEXP (XEXP (operand1, 0), 1); + gcc_assert (GET_CODE (const_part) == CONST_INT); + + /* Force the function label into memory. */ + temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); + } + else + { + /* No constant part. */ + const_part = NULL_RTX; + + /* Force the function label into memory. */ + temp = force_const_mem (mode, operand1); + } + + + /* Get the address of the memory location. PIC-ify it if + necessary. */ + temp = XEXP (temp, 0); + if (flag_pic) + temp = legitimize_pic_address (temp, mode, scratch_reg); + + /* Put the address of the memory location into our destination + register. */ + operands[1] = temp; + emit_move_sequence (operands, mode, scratch_reg); + + /* Now load from the memory location into our destination + register. */ + operands[1] = gen_rtx_MEM (Pmode, operands[0]); + emit_move_sequence (operands, mode, scratch_reg); + + /* And add back in the constant part. */ + if (const_part != NULL_RTX) + expand_inc (operand0, const_part); + + return 1; + } + + if (flag_pic) + { + rtx temp; + + if (reload_in_progress || reload_completed) + { + temp = scratch_reg ? scratch_reg : operand0; + /* TEMP will hold an address and maybe the actual + data. We want it in WORD_MODE regardless of what mode it + was originally given to us. */ + temp = force_mode (word_mode, temp); + } + else + temp = gen_reg_rtx (Pmode); + + /* (const (plus (symbol) (const_int))) must be forced to + memory during/after reload if the const_int will not fit + in 14 bits. */ + if (GET_CODE (operand1) == CONST + && GET_CODE (XEXP (operand1, 0)) == PLUS + && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT + && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) + && (reload_completed || reload_in_progress) + && flag_pic) + { + rtx const_mem = force_const_mem (mode, operand1); + operands[1] = legitimize_pic_address (XEXP (const_mem, 0), + mode, temp); + operands[1] = replace_equiv_address (const_mem, operands[1]); + emit_move_sequence (operands, mode, temp); + } + else + { + operands[1] = legitimize_pic_address (operand1, mode, temp); + if (REG_P (operand0) && REG_P (operands[1])) + copy_reg_pointer (operand0, operands[1]); + emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); + } + } + /* On the HPPA, references to data space are supposed to use dp, + register 27, but showing it in the RTL inhibits various cse + and loop optimizations. */ + else + { + rtx temp, set; + + if (reload_in_progress || reload_completed) + { + temp = scratch_reg ? scratch_reg : operand0; + /* TEMP will hold an address and maybe the actual + data. We want it in WORD_MODE regardless of what mode it + was originally given to us. */ + temp = force_mode (word_mode, temp); + } + else + temp = gen_reg_rtx (mode); + + /* Loading a SYMBOL_REF into a register makes that register + safe to be used as the base in an indexed address. + + Don't mark hard registers though. That loses. */ + if (GET_CODE (operand0) == REG + && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + mark_reg_pointer (operand0, BITS_PER_UNIT); + if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) + mark_reg_pointer (temp, BITS_PER_UNIT); + + if (ishighonly) + set = gen_rtx_SET (mode, operand0, temp); + else + set = gen_rtx_SET (VOIDmode, + operand0, + gen_rtx_LO_SUM (mode, temp, operand1)); + + emit_insn (gen_rtx_SET (VOIDmode, + temp, + gen_rtx_HIGH (mode, operand1))); + emit_insn (set); + + } + return 1; + } + else if (pa_tls_referenced_p (operand1)) + { + rtx tmp = operand1; + rtx addend = NULL; + + if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) + { + addend = XEXP (XEXP (tmp, 0), 1); + tmp = XEXP (XEXP (tmp, 0), 0); + } + + gcc_assert (GET_CODE (tmp) == SYMBOL_REF); + tmp = legitimize_tls_address (tmp); + if (addend) + { + tmp = gen_rtx_PLUS (mode, tmp, addend); + tmp = force_operand (tmp, operands[0]); + } + operands[1] = tmp; + } + else if (GET_CODE (operand1) != CONST_INT + || !cint_ok_for_move (INTVAL (operand1))) + { + rtx insn, temp; + rtx op1 = operand1; + HOST_WIDE_INT value = 0; + HOST_WIDE_INT insv = 0; + int insert = 0; + + if (GET_CODE (operand1) == CONST_INT) + value = INTVAL (operand1); + + if (TARGET_64BIT + && GET_CODE (operand1) == CONST_INT + && HOST_BITS_PER_WIDE_INT > 32 + && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) + { + HOST_WIDE_INT nval; + + /* Extract the low order 32 bits of the value and sign extend. + If the new value is the same as the original value, we can + can use the original value as-is. If the new value is + different, we use it and insert the most-significant 32-bits + of the original value into the final result. */ + nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) + ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); + if (value != nval) + { +#if HOST_BITS_PER_WIDE_INT > 32 + insv = value >= 0 ? value >> 32 : ~(~value >> 32); +#endif + insert = 1; + value = nval; + operand1 = GEN_INT (nval); + } + } + + if (reload_in_progress || reload_completed) + temp = scratch_reg ? scratch_reg : operand0; + else + temp = gen_reg_rtx (mode); + + /* We don't directly split DImode constants on 32-bit targets + because PLUS uses an 11-bit immediate and the insn sequence + generated is not as efficient as the one using HIGH/LO_SUM. */ + if (GET_CODE (operand1) == CONST_INT + && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD + && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT + && !insert) + { + /* Directly break constant into high and low parts. This + provides better optimization opportunities because various + passes recognize constants split with PLUS but not LO_SUM. + We use a 14-bit signed low part except when the addition + of 0x4000 to the high part might change the sign of the + high part. */ + HOST_WIDE_INT low = value & 0x3fff; + HOST_WIDE_INT high = value & ~ 0x3fff; + + if (low >= 0x2000) + { + if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) + high += 0x2000; + else + high += 0x4000; + } + + low = value - high; + + emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high))); + operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_HIGH (mode, operand1))); + operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); + } + + insn = emit_move_insn (operands[0], operands[1]); + + /* Now insert the most significant 32 bits of the value + into the register. When we don't have a second register + available, it could take up to nine instructions to load + a 64-bit integer constant. Prior to reload, we force + constants that would take more than three instructions + to load to the constant pool. During and after reload, + we have to handle all possible values. */ + if (insert) + { + /* Use a HIGH/LO_SUM/INSV sequence if we have a second + register and the value to be inserted is outside the + range that can be loaded with three depdi instructions. */ + if (temp != operand0 && (insv >= 16384 || insv < -16384)) + { + operand1 = GEN_INT (insv); + + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_HIGH (mode, operand1))); + emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); + emit_insn (gen_insv (operand0, GEN_INT (32), + const0_rtx, temp)); + } + else + { + int len = 5, pos = 27; + + /* Insert the bits using the depdi instruction. */ + while (pos >= 0) + { + HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; + HOST_WIDE_INT sign = v5 < 0; + + /* Left extend the insertion. */ + insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); + while (pos > 0 && (insv & 1) == sign) + { + insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); + len += 1; + pos -= 1; + } + + emit_insn (gen_insv (operand0, GEN_INT (len), + GEN_INT (pos), GEN_INT (v5))); + + len = pos > 0 && pos < 5 ? pos : 5; + pos -= len; + } + } + } + + set_unique_reg_note (insn, REG_EQUAL, op1); + + return 1; + } + } + /* Now have insn-emit do whatever it normally does. */ + return 0; +} + +/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning + it will need a link/runtime reloc). */ + +int +reloc_needed (tree exp) +{ + int reloc = 0; + + switch (TREE_CODE (exp)) + { + case ADDR_EXPR: + return 1; + + case POINTER_PLUS_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + reloc = reloc_needed (TREE_OPERAND (exp, 0)); + reloc |= reloc_needed (TREE_OPERAND (exp, 1)); + break; + + CASE_CONVERT: + case NON_LVALUE_EXPR: + reloc = reloc_needed (TREE_OPERAND (exp, 0)); + break; + + case CONSTRUCTOR: + { + tree value; + unsigned HOST_WIDE_INT ix; + + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) + if (value) + reloc |= reloc_needed (value); + } + break; + + case ERROR_MARK: + break; + + default: + break; + } + return reloc; +} + +/* Does operand (which is a symbolic_operand) live in text space? + If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info, + will be true. */ + +int +read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (GET_CODE (operand) == CONST) + operand = XEXP (XEXP (operand, 0), 0); + if (flag_pic) + { + if (GET_CODE (operand) == SYMBOL_REF) + return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand); + } + else + { + if (GET_CODE (operand) == SYMBOL_REF) + return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand); + } + return 1; +} + + +/* Return the best assembler insn template + for moving operands[1] into operands[0] as a fullword. */ +const char * +singlemove_string (rtx *operands) +{ + HOST_WIDE_INT intval; + + if (GET_CODE (operands[0]) == MEM) + return "stw %r1,%0"; + if (GET_CODE (operands[1]) == MEM) + return "ldw %1,%0"; + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + long i; + REAL_VALUE_TYPE d; + + gcc_assert (GET_MODE (operands[1]) == SFmode); + + /* Translate the CONST_DOUBLE to a CONST_INT with the same target + bit pattern. */ + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (d, i); + + operands[1] = GEN_INT (i); + /* Fall through to CONST_INT case. */ + } + if (GET_CODE (operands[1]) == CONST_INT) + { + intval = INTVAL (operands[1]); + + if (VAL_14_BITS_P (intval)) + return "ldi %1,%0"; + else if ((intval & 0x7ff) == 0) + return "ldil L'%1,%0"; + else if (zdepi_cint_p (intval)) + return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; + else + return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; + } + return "copy %1,%0"; +} + + +/* Compute position (in OP[1]) and width (in OP[2]) + useful for copying IMM to a register using the zdepi + instructions. Store the immediate value to insert in OP[0]. */ +static void +compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) +{ + int lsb, len; + + /* Find the least significant set bit in IMM. */ + for (lsb = 0; lsb < 32; lsb++) + { + if ((imm & 1) != 0) + break; + imm >>= 1; + } + + /* Choose variants based on *sign* of the 5-bit field. */ + if ((imm & 0x10) == 0) + len = (lsb <= 28) ? 4 : 32 - lsb; + else + { + /* Find the width of the bitstring in IMM. */ + for (len = 5; len < 32 - lsb; len++) + { + if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) + break; + } + + /* Sign extend IMM as a 5-bit value. */ + imm = (imm & 0xf) - 0x10; + } + + op[0] = imm; + op[1] = 31 - lsb; + op[2] = len; +} + +/* Compute position (in OP[1]) and width (in OP[2]) + useful for copying IMM to a register using the depdi,z + instructions. Store the immediate value to insert in OP[0]. */ +void +compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) +{ + int lsb, len, maxlen; + + maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); + + /* Find the least significant set bit in IMM. */ + for (lsb = 0; lsb < maxlen; lsb++) + { + if ((imm & 1) != 0) + break; + imm >>= 1; + } + + /* Choose variants based on *sign* of the 5-bit field. */ + if ((imm & 0x10) == 0) + len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; + else + { + /* Find the width of the bitstring in IMM. */ + for (len = 5; len < maxlen - lsb; len++) + { + if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) + break; + } + + /* Extend length if host is narrow and IMM is negative. */ + if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) + len += 32; + + /* Sign extend IMM as a 5-bit value. */ + imm = (imm & 0xf) - 0x10; + } + + op[0] = imm; + op[1] = 63 - lsb; + op[2] = len; +} + +/* Output assembler code to perform a doubleword move insn + with operands OPERANDS. */ + +const char * +output_move_double (rtx *operands) +{ + enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; + rtx latehalf[2]; + rtx addreg0 = 0, addreg1 = 0; + + /* First classify both operands. */ + + if (REG_P (operands[0])) + optype0 = REGOP; + else if (offsettable_memref_p (operands[0])) + optype0 = OFFSOP; + else if (GET_CODE (operands[0]) == MEM) + optype0 = MEMOP; + else + optype0 = RNDOP; + + if (REG_P (operands[1])) + optype1 = REGOP; + else if (CONSTANT_P (operands[1])) + optype1 = CNSTOP; + else if (offsettable_memref_p (operands[1])) + optype1 = OFFSOP; + else if (GET_CODE (operands[1]) == MEM) + optype1 = MEMOP; + else + optype1 = RNDOP; + + /* Check for the cases that the operand constraints are not + supposed to allow to happen. */ + gcc_assert (optype0 == REGOP || optype1 == REGOP); + + /* Handle copies between general and floating registers. */ + + if (optype0 == REGOP && optype1 == REGOP + && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) + { + if (FP_REG_P (operands[0])) + { + output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); + output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); + return "{fldds|fldd} -16(%%sp),%0"; + } + else + { + output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); + output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); + return "{ldws|ldw} -12(%%sp),%R0"; + } + } + + /* Handle auto decrementing and incrementing loads and stores + specifically, since the structure of the function doesn't work + for them without major modification. Do it better when we learn + this port about the general inc/dec addressing of PA. + (This was written by tege. Chide him if it doesn't work.) */ + + if (optype0 == MEMOP) + { + /* We have to output the address syntax ourselves, since print_operand + doesn't deal with the addresses we want to use. Fix this later. */ + + rtx addr = XEXP (operands[0], 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); + + operands[0] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[1]) == REG + && GET_CODE (operands[0]) == REG); + + gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); + + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == POST_INC) + return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; + return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; + } + else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); + + operands[0] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[1]) == REG + && GET_CODE (operands[0]) == REG); + + gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); + /* No overlap between high target register and address + register. (We do this in a non-obvious way to save a + register file writeback) */ + if (GET_CODE (addr) == PRE_INC) + return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; + return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; + } + } + if (optype1 == MEMOP) + { + /* We have to output the address syntax ourselves, since print_operand + doesn't deal with the addresses we want to use. Fix this later. */ + + rtx addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + operands[1] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[0]) == REG + && GET_CODE (operands[1]) == REG); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == POST_INC) + return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; + return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; + } + else + { + /* This is an undefined situation. We should load into the + address register *and* update that register. Probably + we don't need to handle this at all. */ + if (GET_CODE (addr) == POST_INC) + return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; + return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; + } + } + else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + operands[1] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[0]) == REG + && GET_CODE (operands[1]) == REG); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == PRE_INC) + return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; + return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; + } + else + { + /* This is an undefined situation. We should load into the + address register *and* update that register. Probably + we don't need to handle this at all. */ + if (GET_CODE (addr) == PRE_INC) + return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; + return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; + } + } + else if (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == MULT) + { + rtx xoperands[4]; + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + xoperands[0] = high_reg; + xoperands[1] = XEXP (addr, 1); + xoperands[2] = XEXP (XEXP (addr, 0), 0); + xoperands[3] = XEXP (XEXP (addr, 0), 1); + output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", + xoperands); + return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; + } + else + { + xoperands[0] = high_reg; + xoperands[1] = XEXP (addr, 1); + xoperands[2] = XEXP (XEXP (addr, 0), 0); + xoperands[3] = XEXP (XEXP (addr, 0), 1); + output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}", + xoperands); + return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0"; + } + } + } + + /* If an operand is an unoffsettable memory ref, find a register + we can increment temporarily to make it refer to the second word. */ + + if (optype0 == MEMOP) + addreg0 = find_addr_reg (XEXP (operands[0], 0)); + + if (optype1 == MEMOP) + addreg1 = find_addr_reg (XEXP (operands[1], 0)); + + /* Ok, we can do one word at a time. + Normally we do the low-numbered word first. + + In either case, set up in LATEHALF the operands to use + for the high-numbered word and in some cases alter the + operands in OPERANDS to be suitable for the low-numbered word. */ + + if (optype0 == REGOP) + latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + else if (optype0 == OFFSOP) + latehalf[0] = adjust_address (operands[0], SImode, 4); + else + latehalf[0] = operands[0]; + + if (optype1 == REGOP) + latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); + else if (optype1 == OFFSOP) + latehalf[1] = adjust_address (operands[1], SImode, 4); + else if (optype1 == CNSTOP) + split_double (operands[1], &operands[1], &latehalf[1]); + else + latehalf[1] = operands[1]; + + /* If the first move would clobber the source of the second one, + do them in the other order. + + This can happen in two cases: + + mem -> register where the first half of the destination register + is the same register used in the memory's address. Reload + can create such insns. + + mem in this case will be either register indirect or register + indirect plus a valid offset. + + register -> register move where REGNO(dst) == REGNO(src + 1) + someone (Tim/Tege?) claimed this can happen for parameter loads. + + Handle mem -> register case first. */ + if (optype0 == REGOP + && (optype1 == MEMOP || optype1 == OFFSOP) + && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, + operands[1], 0)) + { + /* Do the late half first. */ + if (addreg1) + output_asm_insn ("ldo 4(%0),%0", &addreg1); + output_asm_insn (singlemove_string (latehalf), latehalf); + + /* Then clobber. */ + if (addreg1) + output_asm_insn ("ldo -4(%0),%0", &addreg1); + return singlemove_string (operands); + } + + /* Now handle register -> register case. */ + if (optype0 == REGOP && optype1 == REGOP + && REGNO (operands[0]) == REGNO (operands[1]) + 1) + { + output_asm_insn (singlemove_string (latehalf), latehalf); + return singlemove_string (operands); + } + + /* Normal case: do the two words, low-numbered first. */ + + output_asm_insn (singlemove_string (operands), operands); + + /* Make any unoffsettable addresses point at high-numbered word. */ + if (addreg0) + output_asm_insn ("ldo 4(%0),%0", &addreg0); + if (addreg1) + output_asm_insn ("ldo 4(%0),%0", &addreg1); + + /* Do that word. */ + output_asm_insn (singlemove_string (latehalf), latehalf); + + /* Undo the adds we just did. */ + if (addreg0) + output_asm_insn ("ldo -4(%0),%0", &addreg0); + if (addreg1) + output_asm_insn ("ldo -4(%0),%0", &addreg1); + + return ""; +} + +const char * +output_fp_move_double (rtx *operands) +{ + if (FP_REG_P (operands[0])) + { + if (FP_REG_P (operands[1]) + || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) + output_asm_insn ("fcpy,dbl %f1,%0", operands); + else + output_asm_insn ("fldd%F1 %1,%0", operands); + } + else if (FP_REG_P (operands[1])) + { + output_asm_insn ("fstd%F0 %1,%0", operands); + } + else + { + rtx xoperands[2]; + + gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); + + /* This is a pain. You have to be prepared to deal with an + arbitrary address here including pre/post increment/decrement. + + so avoid this in the MD. */ + gcc_assert (GET_CODE (operands[0]) == REG); + + xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + xoperands[0] = operands[0]; + output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); + } + return ""; +} + +/* Return a REG that occurs in ADDR with coefficient 1. + ADDR can be effectively incremented by incrementing REG. */ + +static rtx +find_addr_reg (rtx addr) +{ + while (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 0)) == REG) + addr = XEXP (addr, 0); + else if (GET_CODE (XEXP (addr, 1)) == REG) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 0))) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 1))) + addr = XEXP (addr, 0); + else + gcc_unreachable (); + } + gcc_assert (GET_CODE (addr) == REG); + return addr; +} + +/* Emit code to perform a block move. + + OPERANDS[0] is the destination pointer as a REG, clobbered. + OPERANDS[1] is the source pointer as a REG, clobbered. + OPERANDS[2] is a register for temporary storage. + OPERANDS[3] is a register for temporary storage. + OPERANDS[4] is the size as a CONST_INT + OPERANDS[5] is the alignment safe to use, as a CONST_INT. + OPERANDS[6] is another temporary register. */ + +const char * +output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) +{ + int align = INTVAL (operands[5]); + unsigned long n_bytes = INTVAL (operands[4]); + + /* We can't move more than a word at a time because the PA + has no longer integer move insns. (Could use fp mem ops?) */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) + { + case 8: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 16); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldd,ma 8(%1),%3", operands); + output_asm_insn ("ldd,ma 8(%1),%6", operands); + output_asm_insn ("std,ma %3,8(%0)", operands); + output_asm_insn ("addib,>= -16,%2,.-12", operands); + output_asm_insn ("std,ma %6,8(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 16 != 0) + { + operands[4] = GEN_INT (n_bytes % 8); + if (n_bytes % 16 >= 8) + output_asm_insn ("ldd,ma 8(%1),%3", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("ldd 0(%1),%6", operands); + if (n_bytes % 16 >= 8) + output_asm_insn ("std,ma %3,8(%0)", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("stdby,e %6,%4(%0)", operands); + } + return ""; + + case 4: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); + output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); + output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); + output_asm_insn ("addib,>= -8,%2,.-12", operands); + output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[4] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) + output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("ldw 0(%1),%6", operands); + if (n_bytes % 8 >= 4) + output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); + } + return ""; + + case 2: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); + output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); + output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); + output_asm_insn ("addib,>= -4,%2,.-12", operands); + output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) + output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("ldb 0(%1),%6", operands); + if (n_bytes % 4 >= 2) + output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %6,0(%0)", operands); + } + return ""; + + case 1: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); + output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); + output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); + output_asm_insn ("addib,>= -2,%2,.-12", operands); + output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 2 != 0) + { + output_asm_insn ("ldb 0(%1),%3", operands); + output_asm_insn ("stb %3,0(%0)", operands); + } + return ""; + + default: + gcc_unreachable (); + } +} + +/* Count the number of insns necessary to handle this block move. + + Basic structure is the same as emit_block_move, except that we + count insns rather than emit them. */ + +static int +compute_movmem_length (rtx insn) +{ + rtx pat = PATTERN (insn); + unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); + unsigned int n_insns = 0; + + /* We can't move more than four bytes at a time because the PA + has no longer integer move insns. (Could use fp mem ops?) */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* The basic copying loop. */ + n_insns = 6; + + /* Residuals. */ + if (n_bytes % (2 * align) != 0) + { + if ((n_bytes % (2 * align)) >= align) + n_insns += 2; + + if ((n_bytes % align) != 0) + n_insns += 2; + } + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; +} + +/* Emit code to perform a block clear. + + OPERANDS[0] is the destination pointer as a REG, clobbered. + OPERANDS[1] is a register for temporary storage. + OPERANDS[2] is the size as a CONST_INT + OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ + +const char * +output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) +{ + int align = INTVAL (operands[3]); + unsigned long n_bytes = INTVAL (operands[2]); + + /* We can't clear more than a word at a time because the PA + has no longer integer move insns. */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) + { + case 8: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 16); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("std,ma %%r0,8(%0)", operands); + output_asm_insn ("addib,>= -16,%1,.-4", operands); + output_asm_insn ("std,ma %%r0,8(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 16 != 0) + { + operands[2] = GEN_INT (n_bytes % 8); + if (n_bytes % 16 >= 8) + output_asm_insn ("std,ma %%r0,8(%0)", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("stdby,e %%r0,%2(%0)", operands); + } + return ""; + + case 4: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + output_asm_insn ("addib,>= -8,%1,.-4", operands); + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[2] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); + } + return ""; + + case 2: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + output_asm_insn ("addib,>= -4,%1,.-4", operands); + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %%r0,0(%0)", operands); + } + return ""; + + case 1: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); + output_asm_insn ("addib,>= -2,%1,.-4", operands); + output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 2 != 0) + output_asm_insn ("stb %%r0,0(%0)", operands); + + return ""; + + default: + gcc_unreachable (); + } +} + +/* Count the number of insns necessary to handle this block move. + + Basic structure is the same as emit_block_move, except that we + count insns rather than emit them. */ + +static int +compute_clrmem_length (rtx insn) +{ + rtx pat = PATTERN (insn); + unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); + unsigned int n_insns = 0; + + /* We can't clear more than a word at a time because the PA + has no longer integer move insns. */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* The basic loop. */ + n_insns = 4; + + /* Residuals. */ + if (n_bytes % (2 * align) != 0) + { + if ((n_bytes % (2 * align)) >= align) + n_insns++; + + if ((n_bytes % align) != 0) + n_insns++; + } + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; +} + + +const char * +output_and (rtx *operands) +{ + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) + { + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int ls0, ls1, ms0, p, len; + + for (ls0 = 0; ls0 < 32; ls0++) + if ((mask & (1 << ls0)) == 0) + break; + + for (ls1 = ls0; ls1 < 32; ls1++) + if ((mask & (1 << ls1)) != 0) + break; + + for (ms0 = ls1; ms0 < 32; ms0++) + if ((mask & (1 << ms0)) == 0) + break; + + gcc_assert (ms0 == 32); + + if (ls1 == 32) + { + len = ls0; + + gcc_assert (len); + + operands[2] = GEN_INT (len); + return "{extru|extrw,u} %1,31,%2,%0"; + } + else + { + /* We could use this `depi' for the case above as well, but `depi' + requires one more register file access than an `extru'. */ + + p = 31 - ls0; + len = ls1 - ls0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "{depi|depwi} 0,%2,%3,%0"; + } + } + else + return "and %1,%2,%0"; +} + +/* Return a string to perform a bitwise-and of operands[1] with operands[2] + storing the result in operands[0]. */ +const char * +output_64bit_and (rtx *operands) +{ + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) + { + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int ls0, ls1, ms0, p, len; + + for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) + break; + + for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) + break; + + for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) + break; + + gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); + + if (ls1 == HOST_BITS_PER_WIDE_INT) + { + len = ls0; + + gcc_assert (len); + + operands[2] = GEN_INT (len); + return "extrd,u %1,63,%2,%0"; + } + else + { + /* We could use this `depi' for the case above as well, but `depi' + requires one more register file access than an `extru'. */ + + p = 63 - ls0; + len = ls1 - ls0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "depdi 0,%2,%3,%0"; + } + } + else + return "and %1,%2,%0"; +} + +const char * +output_ior (rtx *operands) +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int bs0, bs1, p, len; + + if (INTVAL (operands[2]) == 0) + return "copy %1,%0"; + + for (bs0 = 0; bs0 < 32; bs0++) + if ((mask & (1 << bs0)) != 0) + break; + + for (bs1 = bs0; bs1 < 32; bs1++) + if ((mask & (1 << bs1)) == 0) + break; + + gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); + + p = 31 - bs0; + len = bs1 - bs0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "{depi|depwi} -1,%2,%3,%0"; +} + +/* Return a string to perform a bitwise-and of operands[1] with operands[2] + storing the result in operands[0]. */ +const char * +output_64bit_ior (rtx *operands) +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int bs0, bs1, p, len; + + if (INTVAL (operands[2]) == 0) + return "copy %1,%0"; + + for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) + break; + + for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) + break; + + gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT + || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); + + p = 63 - bs0; + len = bs1 - bs0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "depdi -1,%2,%3,%0"; +} + +/* Target hook for assembling integer objects. This code handles + aligned SI and DI integers specially since function references + must be preceded by P%. */ + +static bool +pa_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + if (size == UNITS_PER_WORD + && aligned_p + && function_label_operand (x, VOIDmode)) + { + fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file); + output_addr_const (asm_out_file, x); + fputc ('\n', asm_out_file); + return true; + } + return default_assemble_integer (x, size, aligned_p); +} + +/* Output an ascii string. */ +void +output_ascii (FILE *file, const char *p, int size) +{ + int i; + int chars_output; + unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ + + /* The HP assembler can only take strings of 256 characters at one + time. This is a limitation on input line length, *not* the + length of the string. Sigh. Even worse, it seems that the + restriction is in number of input characters (see \xnn & + \whatever). So we have to do this very carefully. */ + + fputs ("\t.STRING \"", file); + + chars_output = 0; + for (i = 0; i < size; i += 4) + { + int co = 0; + int io = 0; + for (io = 0, co = 0; io < MIN (4, size - i); io++) + { + register unsigned int c = (unsigned char) p[i + io]; + + if (c == '\"' || c == '\\') + partial_output[co++] = '\\'; + if (c >= ' ' && c < 0177) + partial_output[co++] = c; + else + { + unsigned int hexd; + partial_output[co++] = '\\'; + partial_output[co++] = 'x'; + hexd = c / 16 - 0 + '0'; + if (hexd > '9') + hexd -= '9' - 'a' + 1; + partial_output[co++] = hexd; + hexd = c % 16 - 0 + '0'; + if (hexd > '9') + hexd -= '9' - 'a' + 1; + partial_output[co++] = hexd; + } + } + if (chars_output + co > 243) + { + fputs ("\"\n\t.STRING \"", file); + chars_output = 0; + } + fwrite (partial_output, 1, (size_t) co, file); + chars_output += co; + co = 0; + } + fputs ("\"\n", file); +} + +/* Try to rewrite floating point comparisons & branches to avoid + useless add,tr insns. + + CHECK_NOTES is nonzero if we should examine REG_DEAD notes + to see if FPCC is dead. CHECK_NOTES is nonzero for the + first attempt to remove useless add,tr insns. It is zero + for the second pass as reorg sometimes leaves bogus REG_DEAD + notes lying around. + + When CHECK_NOTES is zero we can only eliminate add,tr insns + when there's a 1:1 correspondence between fcmp and ftest/fbranch + instructions. */ +static void +remove_useless_addtr_insns (int check_notes) +{ + rtx insn; + static int pass = 0; + + /* This is fairly cheap, so always run it when optimizing. */ + if (optimize > 0) + { + int fcmp_count = 0; + int fbranch_count = 0; + + /* Walk all the insns in this function looking for fcmp & fbranch + instructions. Keep track of how many of each we find. */ + for (insn = get_insns (); insn; insn = next_insn (insn)) + { + rtx tmp; + + /* Ignore anything that isn't an INSN or a JUMP_INSN. */ + if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) + continue; + + tmp = PATTERN (insn); + + /* It must be a set. */ + if (GET_CODE (tmp) != SET) + continue; + + /* If the destination is CCFP, then we've found an fcmp insn. */ + tmp = SET_DEST (tmp); + if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) + { + fcmp_count++; + continue; + } + + tmp = PATTERN (insn); + /* If this is an fbranch instruction, bump the fbranch counter. */ + if (GET_CODE (tmp) == SET + && SET_DEST (tmp) == pc_rtx + && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) + { + fbranch_count++; + continue; + } + } + + + /* Find all floating point compare + branch insns. If possible, + reverse the comparison & the branch to avoid add,tr insns. */ + for (insn = get_insns (); insn; insn = next_insn (insn)) + { + rtx tmp, next; + + /* Ignore anything that isn't an INSN. */ + if (GET_CODE (insn) != INSN) + continue; + + tmp = PATTERN (insn); + + /* It must be a set. */ + if (GET_CODE (tmp) != SET) + continue; + + /* The destination must be CCFP, which is register zero. */ + tmp = SET_DEST (tmp); + if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) + continue; + + /* INSN should be a set of CCFP. + + See if the result of this insn is used in a reversed FP + conditional branch. If so, reverse our condition and + the branch. Doing so avoids useless add,tr insns. */ + next = next_insn (insn); + while (next) + { + /* Jumps, calls and labels stop our search. */ + if (GET_CODE (next) == JUMP_INSN + || GET_CODE (next) == CALL_INSN + || GET_CODE (next) == CODE_LABEL) + break; + + /* As does another fcmp insn. */ + if (GET_CODE (next) == INSN + && GET_CODE (PATTERN (next)) == SET + && GET_CODE (SET_DEST (PATTERN (next))) == REG + && REGNO (SET_DEST (PATTERN (next))) == 0) + break; + + next = next_insn (next); + } + + /* Is NEXT_INSN a branch? */ + if (next + && GET_CODE (next) == JUMP_INSN) + { + rtx pattern = PATTERN (next); + + /* If it a reversed fp conditional branch (e.g. uses add,tr) + and CCFP dies, then reverse our conditional and the branch + to avoid the add,tr. */ + if (GET_CODE (pattern) == SET + && SET_DEST (pattern) == pc_rtx + && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 + && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC + && (fcmp_count == fbranch_count + || (check_notes + && find_regno_note (next, REG_DEAD, 0)))) + { + /* Reverse the branch. */ + tmp = XEXP (SET_SRC (pattern), 1); + XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); + XEXP (SET_SRC (pattern), 2) = tmp; + INSN_CODE (next) = -1; + + /* Reverse our condition. */ + tmp = PATTERN (insn); + PUT_CODE (XEXP (tmp, 1), + (reverse_condition_maybe_unordered + (GET_CODE (XEXP (tmp, 1))))); + } + } + } + } + + pass = !pass; + +} + +/* You may have trouble believing this, but this is the 32 bit HP-PA + stack layout. Wow. + + Offset Contents + + Variable arguments (optional; any number may be allocated) + + SP-(4*(N+9)) arg word N + : : + SP-56 arg word 5 + SP-52 arg word 4 + + Fixed arguments (must be allocated; may remain unused) + + SP-48 arg word 3 + SP-44 arg word 2 + SP-40 arg word 1 + SP-36 arg word 0 + + Frame Marker + + SP-32 External Data Pointer (DP) + SP-28 External sr4 + SP-24 External/stub RP (RP') + SP-20 Current RP + SP-16 Static Link + SP-12 Clean up + SP-8 Calling Stub RP (RP'') + SP-4 Previous SP + + Top of Frame + + SP-0 Stack Pointer (points to next available address) + +*/ + +/* This function saves registers as follows. Registers marked with ' are + this function's registers (as opposed to the previous function's). + If a frame_pointer isn't needed, r4 is saved as a general register; + the space for the frame pointer is still allocated, though, to keep + things simple. + + + Top of Frame + + SP (FP') Previous FP + SP + 4 Alignment filler (sigh) + SP + 8 Space for locals reserved here. + . + . + . + SP + n All call saved register used. + . + . + . + SP + o All call saved fp registers used. + . + . + . + SP + p (SP') points to next available address. + +*/ + +/* Global variables set by output_function_prologue(). */ +/* Size of frame. Need to know this to emit return insns from + leaf procedures. */ +static HOST_WIDE_INT actual_fsize, local_fsize; +static int save_fregs; + +/* Emit RTL to store REG at the memory location specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const patterns. + + Note in DISP > 8k case, we will leave the high part of the address + in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ + +static void +store_reg (int reg, HOST_WIDE_INT disp, int base) +{ + rtx insn, dest, src, basereg; + + src = gen_rtx_REG (word_mode, reg); + basereg = gen_rtx_REG (Pmode, base); + if (VAL_14_BITS_P (disp)) + { + dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp)); + insn = emit_move_insn (dest, src); + } + else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) + { + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, delta); + insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); + if (DO_FRAME_NOTES) + { + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, tmpreg, + gen_rtx_PLUS (Pmode, basereg, delta))); + RTX_FRAME_RELATED_P (insn) = 1; + } + dest = gen_rtx_MEM (word_mode, tmpreg); + insn = emit_move_insn (dest, src); + } + else + { + rtx delta = GEN_INT (disp); + rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, high); + dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); + insn = emit_move_insn (dest, src); + if (DO_FRAME_NOTES) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (word_mode, + gen_rtx_PLUS (word_mode, + basereg, + delta)), + src)); + } + + if (DO_FRAME_NOTES) + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Emit RTL to store REG at the memory location specified by BASE and then + add MOD to BASE. MOD must be <= 8k. */ + +static void +store_reg_modify (int base, int reg, HOST_WIDE_INT mod) +{ + rtx insn, basereg, srcreg, delta; + + gcc_assert (VAL_14_BITS_P (mod)); + + basereg = gen_rtx_REG (Pmode, base); + srcreg = gen_rtx_REG (word_mode, reg); + delta = GEN_INT (mod); + + insn = emit_insn (gen_post_store (basereg, srcreg, delta)); + if (DO_FRAME_NOTES) + { + RTX_FRAME_RELATED_P (insn) = 1; + + /* RTX_FRAME_RELATED_P must be set on each frame related set + in a parallel with more than one element. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + } +} + +/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case + where DISP > 8k by using the add_high_const patterns. NOTE indicates + whether to add a frame note or not. + + In the DISP > 8k case, we leave the high part of the address in %r1. + There is code in expand_hppa_{prologue,epilogue} that knows about this. */ + +static void +set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) +{ + rtx insn; + + if (VAL_14_BITS_P (disp)) + { + insn = emit_move_insn (gen_rtx_REG (Pmode, reg), + plus_constant (gen_rtx_REG (Pmode, base), disp)); + } + else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) + { + rtx basereg = gen_rtx_REG (Pmode, base); + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, delta); + insn = emit_move_insn (gen_rtx_REG (Pmode, reg), + gen_rtx_PLUS (Pmode, tmpreg, basereg)); + if (DO_FRAME_NOTES) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, tmpreg, + gen_rtx_PLUS (Pmode, basereg, delta))); + } + else + { + rtx basereg = gen_rtx_REG (Pmode, base); + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, + gen_rtx_PLUS (Pmode, basereg, + gen_rtx_HIGH (Pmode, delta))); + insn = emit_move_insn (gen_rtx_REG (Pmode, reg), + gen_rtx_LO_SUM (Pmode, tmpreg, delta)); + } + + if (DO_FRAME_NOTES && note) + RTX_FRAME_RELATED_P (insn) = 1; +} + +HOST_WIDE_INT +compute_frame_size (HOST_WIDE_INT size, int *fregs_live) +{ + int freg_saved = 0; + int i, j; + + /* The code in hppa_expand_prologue and hppa_expand_epilogue must + be consistent with the rounding and size calculation done here. + Change them at the same time. */ + + /* We do our own stack alignment. First, round the size of the + stack locals up to a word boundary. */ + size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); + + /* Space for previous frame pointer + filler. If any frame is + allocated, we need to add in the STARTING_FRAME_OFFSET. We + waste some space here for the sake of HP compatibility. The + first slot is only used when the frame pointer is needed. */ + if (size || frame_pointer_needed) + size += STARTING_FRAME_OFFSET; + + /* If the current function calls __builtin_eh_return, then we need + to allocate stack space for registers that will hold data for + the exception handler. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i; + + for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) + continue; + size += i * UNITS_PER_WORD; + } + + /* Account for space used by the callee general register saves. */ + for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) + if (df_regs_ever_live_p (i)) + size += UNITS_PER_WORD; + + /* Account for space used by the callee floating point register saves. */ + for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) + if (df_regs_ever_live_p (i) + || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) + { + freg_saved = 1; + + /* We always save both halves of the FP register, so always + increment the frame size by 8 bytes. */ + size += 8; + } + + /* If any of the floating registers are saved, account for the + alignment needed for the floating point register save block. */ + if (freg_saved) + { + size = (size + 7) & ~7; + if (fregs_live) + *fregs_live = 1; + } + + /* The various ABIs include space for the outgoing parameters in the + size of the current function's stack frame. We don't need to align + for the outgoing arguments as their alignment is set by the final + rounding for the frame as a whole. */ + size += crtl->outgoing_args_size; + + /* Allocate space for the fixed frame marker. This space must be + allocated for any function that makes calls or allocates + stack space. */ + if (!current_function_is_leaf || size) + size += TARGET_64BIT ? 48 : 32; + + /* Finally, round to the preferred stack boundary. */ + return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) + & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); +} + +/* Generate the assembly code for function entry. FILE is a stdio + stream to output the code to. SIZE is an int: how many units of + temporary storage to allocate. + + Refer to the array `regs_ever_live' to determine which registers to + save; `regs_ever_live[I]' is nonzero if register number I is ever + used in the function. This function is responsible for knowing + which registers should not be saved even if used. */ + +/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block + of memory. If any fpu reg is used in the function, we allocate + such a block here, at the bottom of the frame, just in case it's needed. + + If this function is a leaf procedure, then we may choose not + to do a "save" insn. The decision about whether or not + to do this is made in regclass.c. */ + +static void +pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + /* The function's label and associated .PROC must never be + separated and must be output *after* any profiling declarations + to avoid changing spaces/subspaces within a procedure. */ + ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); + fputs ("\t.PROC\n", file); + + /* hppa_expand_prologue does the dirty work now. We just need + to output the assembler directives which denote the start + of a function. */ + fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); + if (current_function_is_leaf) + fputs (",NO_CALLS", file); + else + fputs (",CALLS", file); + if (rp_saved) + fputs (",SAVE_RP", file); + + /* The SAVE_SP flag is used to indicate that register %r3 is stored + at the beginning of the frame and that it is used as the frame + pointer for the frame. We do this because our current frame + layout doesn't conform to that specified in the HP runtime + documentation and we need a way to indicate to programs such as + GDB where %r3 is saved. The SAVE_SP flag was chosen because it + isn't used by HP compilers but is supported by the assembler. + However, SAVE_SP is supposed to indicate that the previous stack + pointer has been saved in the frame marker. */ + if (frame_pointer_needed) + fputs (",SAVE_SP", file); + + /* Pass on information about the number of callee register saves + performed in the prologue. + + The compiler is supposed to pass the highest register number + saved, the assembler then has to adjust that number before + entering it into the unwind descriptor (to account for any + caller saved registers with lower register numbers than the + first callee saved register). */ + if (gr_saved) + fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); + + if (fr_saved) + fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); + + fputs ("\n\t.ENTRY\n", file); + + remove_useless_addtr_insns (0); +} + +void +hppa_expand_prologue (void) +{ + int merge_sp_adjust_with_store = 0; + HOST_WIDE_INT size = get_frame_size (); + HOST_WIDE_INT offset; + int i; + rtx insn, tmpreg; + + gr_saved = 0; + fr_saved = 0; + save_fregs = 0; + + /* Compute total size for frame pointer, filler, locals and rounding to + the next word boundary. Similar code appears in compute_frame_size + and must be changed in tandem with this code. */ + local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); + if (local_fsize || frame_pointer_needed) + local_fsize += STARTING_FRAME_OFFSET; + + actual_fsize = compute_frame_size (size, &save_fregs); + if (flag_stack_usage) + current_function_static_stack_size = actual_fsize; + + /* Compute a few things we will use often. */ + tmpreg = gen_rtx_REG (word_mode, 1); + + /* Save RP first. The calling conventions manual states RP will + always be stored into the caller's frame at sp - 20 or sp - 16 + depending on which ABI is in use. */ + if (df_regs_ever_live_p (2) || crtl->calls_eh_return) + { + store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); + rp_saved = true; + } + else + rp_saved = false; + + /* Allocate the local frame and set up the frame pointer if needed. */ + if (actual_fsize != 0) + { + if (frame_pointer_needed) + { + /* Copy the old frame pointer temporarily into %r1. Set up the + new stack pointer, then store away the saved old frame pointer + into the stack at sp and at the same time update the stack + pointer by actual_fsize bytes. Two versions, first + handles small (<8k) frames. The second handles large (>=8k) + frames. */ + insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx); + if (DO_FRAME_NOTES) + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + if (DO_FRAME_NOTES) + RTX_FRAME_RELATED_P (insn) = 1; + + if (VAL_14_BITS_P (actual_fsize)) + store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); + else + { + /* It is incorrect to store the saved frame pointer at *sp, + then increment sp (writes beyond the current stack boundary). + + So instead use stwm to store at *sp and post-increment the + stack pointer as an atomic operation. Then increment sp to + finish allocating the new frame. */ + HOST_WIDE_INT adjust1 = 8192 - 64; + HOST_WIDE_INT adjust2 = actual_fsize - adjust1; + + store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + adjust2, 1); + } + + /* We set SAVE_SP in frames that need a frame pointer. Thus, + we need to store the previous stack pointer (frame pointer) + into the frame marker on targets that use the HP unwind + library. This allows the HP unwind library to be used to + unwind GCC frames. However, we are not fully compatible + with the HP library because our frame layout differs from + that specified in the HP runtime specification. + + We don't want a frame note on this instruction as the frame + marker moves during dynamic stack allocation. + + This instruction also serves as a blockage to prevent + register spills from being scheduled before the stack + pointer is raised. This is necessary as we store + registers using the frame pointer as a base register, + and the frame pointer is set before sp is raised. */ + if (TARGET_HPUX_UNWIND_LIBRARY) + { + rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (TARGET_64BIT ? -8 : -4)); + + emit_move_insn (gen_rtx_MEM (word_mode, addr), + hard_frame_pointer_rtx); + } + else + emit_insn (gen_blockage ()); + } + /* no frame pointer needed. */ + else + { + /* In some cases we can perform the first callee register save + and allocating the stack frame at the same time. If so, just + make a note of it and defer allocating the frame until saving + the callee registers. */ + if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) + merge_sp_adjust_with_store = 1; + /* Can not optimize. Adjust the stack frame by actual_fsize + bytes. */ + else + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + actual_fsize, 1); + } + } + + /* Normal register save. + + Do not save the frame pointer in the frame_pointer_needed case. It + was done earlier. */ + if (frame_pointer_needed) + { + offset = local_fsize; + + /* Saving the EH return data registers in the frame is the simplest + way to get the frame unwind information emitted. We put them + just before the general registers. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 4; i--) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + store_reg (i, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + gr_saved++; + } + /* Account for %r3 which is saved in a special place. */ + gr_saved++; + } + /* No frame pointer needed. */ + else + { + offset = local_fsize - actual_fsize; + + /* Saving the EH return data registers in the frame is the simplest + way to get the frame unwind information emitted. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + /* If merge_sp_adjust_with_store is nonzero, then we can + optimize the first save. */ + if (merge_sp_adjust_with_store) + { + store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); + merge_sp_adjust_with_store = 0; + } + else + store_reg (regno, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 3; i--) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + /* If merge_sp_adjust_with_store is nonzero, then we can + optimize the first GR save. */ + if (merge_sp_adjust_with_store) + { + store_reg_modify (STACK_POINTER_REGNUM, i, -offset); + merge_sp_adjust_with_store = 0; + } + else + store_reg (i, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + gr_saved++; + } + + /* If we wanted to merge the SP adjustment with a GR save, but we never + did any GR saves, then just emit the adjustment here. */ + if (merge_sp_adjust_with_store) + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + actual_fsize, 1); + } + + /* The hppa calling conventions say that %r19, the pic offset + register, is saved at sp - 32 (in this function's frame) + when generating PIC code. FIXME: What is the correct thing + to do for functions which make no calls and allocate no + frame? Do we need to allocate a frame, or can we just omit + the save? For now we'll just omit the save. + + We don't want a note on this insn as the frame marker can + move if there is a dynamic stack allocation. */ + if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) + { + rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); + + emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); + + } + + /* Align pointer properly (doubleword boundary). */ + offset = (offset + 7) & ~7; + + /* Floating point register store. */ + if (save_fregs) + { + rtx base; + + /* First get the frame or stack pointer to the start of the FP register + save area. */ + if (frame_pointer_needed) + { + set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); + base = hard_frame_pointer_rtx; + } + else + { + set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); + base = stack_pointer_rtx; + } + + /* Now actually save the FP registers. */ + for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) + { + if (df_regs_ever_live_p (i) + || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) + { + rtx addr, insn, reg; + addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg)); + reg = gen_rtx_REG (DFmode, i); + insn = emit_move_insn (addr, reg); + if (DO_FRAME_NOTES) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (TARGET_64BIT) + { + rtx mem = gen_rtx_MEM (DFmode, + plus_constant (base, offset)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, mem, reg)); + } + else + { + rtx meml = gen_rtx_MEM (SFmode, + plus_constant (base, offset)); + rtx memr = gen_rtx_MEM (SFmode, + plus_constant (base, offset + 4)); + rtx regl = gen_rtx_REG (SFmode, i); + rtx regr = gen_rtx_REG (SFmode, i + 1); + rtx setl = gen_rtx_SET (VOIDmode, meml, regl); + rtx setr = gen_rtx_SET (VOIDmode, memr, regr); + rtvec vec; + + RTX_FRAME_RELATED_P (setl) = 1; + RTX_FRAME_RELATED_P (setr) = 1; + vec = gen_rtvec (2, setl, setr); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SEQUENCE (VOIDmode, vec)); + } + } + offset += GET_MODE_SIZE (DFmode); + fr_saved++; + } + } + } +} + +/* Emit RTL to load REG from the memory location specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const patterns. */ + +static void +load_reg (int reg, HOST_WIDE_INT disp, int base) +{ + rtx dest = gen_rtx_REG (word_mode, reg); + rtx basereg = gen_rtx_REG (Pmode, base); + rtx src; + + if (VAL_14_BITS_P (disp)) + src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp)); + else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) + { + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, delta); + if (TARGET_DISABLE_INDEXING) + { + emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); + src = gen_rtx_MEM (word_mode, tmpreg); + } + else + src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); + } + else + { + rtx delta = GEN_INT (disp); + rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, high); + src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); + } + + emit_move_insn (dest, src); +} + +/* Update the total code bytes output to the text section. */ + +static void +update_total_code_bytes (unsigned int nbytes) +{ + if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) + && !IN_NAMED_SECTION_P (cfun->decl)) + { + unsigned int old_total = total_code_bytes; + + total_code_bytes += nbytes; + + /* Be prepared to handle overflows. */ + if (old_total > total_code_bytes) + total_code_bytes = UINT_MAX; + } +} + +/* This function generates the assembly code for function exit. + Args are as for output_function_prologue (). + + The function epilogue should not depend on the current stack + pointer! It should use the frame pointer only. This is mandatory + because of alloca; we also take advantage of it to omit stack + adjustments before returning. */ + +static void +pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + rtx insn = get_last_insn (); + + last_address = 0; + + /* hppa_expand_epilogue does the dirty work now. We just need + to output the assembler directives which denote the end + of a function. + + To make debuggers happy, emit a nop if the epilogue was completely + eliminated due to a volatile call as the last insn in the + current function. That way the return address (in %r2) will + always point to a valid instruction in the current function. */ + + /* Get the last real insn. */ + if (GET_CODE (insn) == NOTE) + insn = prev_real_insn (insn); + + /* If it is a sequence, then look inside. */ + if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + insn = XVECEXP (PATTERN (insn), 0, 0); + + /* If insn is a CALL_INSN, then it must be a call to a volatile + function (otherwise there would be epilogue insns). */ + if (insn && GET_CODE (insn) == CALL_INSN) + { + fputs ("\tnop\n", file); + last_address += 4; + } + + fputs ("\t.EXIT\n\t.PROCEND\n", file); + + if (TARGET_SOM && TARGET_GAS) + { + /* We done with this subspace except possibly for some additional + debug information. Forget that we are in this subspace to ensure + that the next function is output in its own subspace. */ + in_section = NULL; + cfun->machine->in_nsubspa = 2; + } + + if (INSN_ADDRESSES_SET_P ()) + { + insn = get_last_nonnote_insn (); + last_address += INSN_ADDRESSES (INSN_UID (insn)); + if (INSN_P (insn)) + last_address += insn_default_length (insn); + last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) + & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); + } + else + last_address = UINT_MAX; + + /* Finally, update the total number of code bytes output so far. */ + update_total_code_bytes (last_address); +} + +void +hppa_expand_epilogue (void) +{ + rtx tmpreg; + HOST_WIDE_INT offset; + HOST_WIDE_INT ret_off = 0; + int i; + int merge_sp_adjust_with_load = 0; + + /* We will use this often. */ + tmpreg = gen_rtx_REG (word_mode, 1); + + /* Try to restore RP early to avoid load/use interlocks when + RP gets used in the return (bv) instruction. This appears to still + be necessary even when we schedule the prologue and epilogue. */ + if (rp_saved) + { + ret_off = TARGET_64BIT ? -16 : -20; + if (frame_pointer_needed) + { + load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM); + ret_off = 0; + } + else + { + /* No frame pointer, and stack is smaller than 8k. */ + if (VAL_14_BITS_P (ret_off - actual_fsize)) + { + load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); + ret_off = 0; + } + } + } + + /* General register restores. */ + if (frame_pointer_needed) + { + offset = local_fsize; + + /* If the current function calls __builtin_eh_return, then we need + to restore the saved EH data registers. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 4; i--) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + load_reg (i, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + else + { + offset = local_fsize - actual_fsize; + + /* If the current function calls __builtin_eh_return, then we need + to restore the saved EH data registers. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + /* Only for the first load. + merge_sp_adjust_with_load holds the register load + with which we will merge the sp adjustment. */ + if (merge_sp_adjust_with_load == 0 + && local_fsize == 0 + && VAL_14_BITS_P (-actual_fsize)) + merge_sp_adjust_with_load = regno; + else + load_reg (regno, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 3; i--) + { + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + /* Only for the first load. + merge_sp_adjust_with_load holds the register load + with which we will merge the sp adjustment. */ + if (merge_sp_adjust_with_load == 0 + && local_fsize == 0 + && VAL_14_BITS_P (-actual_fsize)) + merge_sp_adjust_with_load = i; + else + load_reg (i, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + } + + /* Align pointer properly (doubleword boundary). */ + offset = (offset + 7) & ~7; + + /* FP register restores. */ + if (save_fregs) + { + /* Adjust the register to index off of. */ + if (frame_pointer_needed) + set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); + else + set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); + + /* Actually do the restores now. */ + for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) + if (df_regs_ever_live_p (i) + || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) + { + rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg)); + rtx dest = gen_rtx_REG (DFmode, i); + emit_move_insn (dest, src); + } + } + + /* Emit a blockage insn here to keep these insns from being moved to + an earlier spot in the epilogue, or into the main instruction stream. + + This is necessary as we must not cut the stack back before all the + restores are finished. */ + emit_insn (gen_blockage ()); + + /* Reset stack pointer (and possibly frame pointer). The stack + pointer is initially set to fp + 64 to avoid a race condition. */ + if (frame_pointer_needed) + { + rtx delta = GEN_INT (-64); + + set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0); + emit_insn (gen_pre_load (hard_frame_pointer_rtx, + stack_pointer_rtx, delta)); + } + /* If we were deferring a callee register restore, do it now. */ + else if (merge_sp_adjust_with_load) + { + rtx delta = GEN_INT (-actual_fsize); + rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); + + emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); + } + else if (actual_fsize != 0) + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + - actual_fsize, 0); + + /* If we haven't restored %r2 yet (no frame pointer, and a stack + frame greater than 8k), do so now. */ + if (ret_off != 0) + load_reg (2, ret_off, STACK_POINTER_REGNUM); + + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + rtx sa = EH_RETURN_STACKADJ_RTX; + + emit_insn (gen_blockage ()); + emit_insn (TARGET_64BIT + ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) + : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); + } +} + +bool +pa_can_use_return_insn (void) +{ + if (!reload_completed) + return false; + + if (frame_pointer_needed) + return false; + + if (df_regs_ever_live_p (2)) + return false; + + if (crtl->profile) + return false; + + return compute_frame_size (get_frame_size (), 0) == 0; +} + +rtx +hppa_pic_save_rtx (void) +{ + return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); +} + +#ifndef NO_DEFERRED_PROFILE_COUNTERS +#define NO_DEFERRED_PROFILE_COUNTERS 0 +#endif + + +/* Vector of funcdef numbers. */ +static VEC(int,heap) *funcdef_nos; + +/* Output deferred profile counters. */ +static void +output_deferred_profile_counters (void) +{ + unsigned int i; + int align, n; + + if (VEC_empty (int, funcdef_nos)) + return; + + switch_to_section (data_section); + align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); + ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); + + for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++) + { + targetm.asm_out.internal_label (asm_out_file, "LP", n); + assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); + } + + VEC_free (int, heap, funcdef_nos); +} + +void +hppa_profile_hook (int label_no) +{ + /* We use SImode for the address of the function in both 32 and + 64-bit code to avoid having to provide DImode versions of the + lcla2 and load_offset_label_address insn patterns. */ + rtx reg = gen_reg_rtx (SImode); + rtx label_rtx = gen_label_rtx (); + rtx begin_label_rtx, call_insn; + char begin_label_name[16]; + + ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, + label_no); + begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); + + if (TARGET_64BIT) + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + + emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); + + /* The address of the function is loaded into %r25 with an instruction- + relative sequence that avoids the use of relocations. The sequence + is split so that the load_offset_label_address instruction can + occupy the delay slot of the call to _mcount. */ + if (TARGET_PA_20) + emit_insn (gen_lcla2 (reg, label_rtx)); + else + emit_insn (gen_lcla1 (reg, label_rtx)); + + emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), + reg, begin_label_rtx, label_rtx)); + +#if !NO_DEFERRED_PROFILE_COUNTERS + { + rtx count_label_rtx, addr, r24; + char count_label_name[16]; + + VEC_safe_push (int, heap, funcdef_nos, label_no); + ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); + count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); + + addr = force_reg (Pmode, count_label_rtx); + r24 = gen_rtx_REG (Pmode, 24); + emit_move_insn (r24, addr); + + call_insn = + emit_call_insn (gen_call (gen_rtx_MEM (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + "_mcount")), + GEN_INT (TARGET_64BIT ? 24 : 12))); + + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); + } +#else + + call_insn = + emit_call_insn (gen_call (gen_rtx_MEM (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + "_mcount")), + GEN_INT (TARGET_64BIT ? 16 : 8))); + +#endif + + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); + + /* Indicate the _mcount call cannot throw, nor will it execute a + non-local goto. */ + make_reg_eh_region_note_nothrow_nononlocal (call_insn); +} + +/* Fetch the return address for the frame COUNT steps up from + the current frame, after the prologue. FRAMEADDR is the + frame pointer of the COUNT frame. + + We want to ignore any export stub remnants here. To handle this, + we examine the code at the return address, and if it is an export + stub, we return a memory rtx for the stub return address stored + at frame-24. + + The value returned is used in two different ways: + + 1. To find a function's caller. + + 2. To change the return address for a function. + + This function handles most instances of case 1; however, it will + fail if there are two levels of stubs to execute on the return + path. The only way I believe that can happen is if the return value + needs a parameter relocation, which never happens for C code. + + This function handles most instances of case 2; however, it will + fail if we did not originally have stub code on the return path + but will need stub code on the new return path. This can happen if + the caller & callee are both in the main program, but the new + return location is in a shared library. */ + +rtx +return_addr_rtx (int count, rtx frameaddr) +{ + rtx label; + rtx rp; + rtx saved_rp; + rtx ins; + + /* The instruction stream at the return address of a PA1.X export stub is: + + 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp + 0x004010a1 | stub+12: ldsid (sr0,rp),r1 + 0x00011820 | stub+16: mtsp r1,sr0 + 0xe0400002 | stub+20: be,n 0(sr0,rp) + + 0xe0400002 must be specified as -532676606 so that it won't be + rejected as an invalid immediate operand on 64-bit hosts. + + The instruction stream at the return address of a PA2.0 export stub is: + + 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp + 0xe840d002 | stub+12: bve,n (rp) + */ + + HOST_WIDE_INT insns[4]; + int i, len; + + if (count != 0) + return NULL_RTX; + + rp = get_hard_reg_initial_val (Pmode, 2); + + if (TARGET_64BIT || TARGET_NO_SPACE_REGS) + return rp; + + /* If there is no export stub then just use the value saved from + the return pointer register. */ + + saved_rp = gen_reg_rtx (Pmode); + emit_move_insn (saved_rp, rp); + + /* Get pointer to the instruction stream. We have to mask out the + privilege level from the two low order bits of the return address + pointer here so that ins will point to the start of the first + instruction that would have been executed if we returned. */ + ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); + label = gen_label_rtx (); + + if (TARGET_PA_20) + { + insns[0] = 0x4bc23fd1; + insns[1] = -398405630; + len = 2; + } + else + { + insns[0] = 0x4bc23fd1; + insns[1] = 0x004010a1; + insns[2] = 0x00011820; + insns[3] = -532676606; + len = 4; + } + + /* Check the instruction stream at the normal return address for the + export stub. If it is an export stub, than our return address is + really in -24[frameaddr]. */ + + for (i = 0; i < len; i++) + { + rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4)); + rtx op1 = GEN_INT (insns[i]); + emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); + } + + /* Here we know that our return address points to an export + stub. We don't want to return the address of the export stub, + but rather the return address of the export stub. That return + address is stored at -24[frameaddr]. */ + + emit_move_insn (saved_rp, + gen_rtx_MEM (Pmode, + memory_address (Pmode, + plus_constant (frameaddr, + -24)))); + + emit_label (label); + + return saved_rp; +} + +void +emit_bcond_fp (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[0]); + rtx operand0 = operands[1]; + rtx operand1 = operands[2]; + rtx label = operands[3]; + + emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), + gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); + + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (NE, + VOIDmode, + gen_rtx_REG (CCFPmode, 0), + const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx))); + +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type attr_type; + + /* Don't adjust costs for a pa8000 chip, also do not adjust any + true dependencies as they are described with bypasses now. */ + if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0) + return cost; + + if (! recog_memoized (insn)) + return 0; + + attr_type = get_attr_type (insn); + + switch (REG_NOTE_KIND (link)) + { + case REG_DEP_ANTI: + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + + if (attr_type == TYPE_FPLOAD) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* A fpload can't be issued until one cycle before a + preceding arithmetic operation has finished if + the target of the fpload is any of the sources + (or destination) of the arithmetic operation. */ + return insn_default_latency (dep_insn) - 1; + + default: + return 0; + } + } + } + else if (attr_type == TYPE_FPALU) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* An ALU flop can't be issued until two cycles before a + preceding divide or sqrt operation has finished if + the target of the ALU flop is any of the sources + (or destination) of the divide or sqrt operation. */ + return insn_default_latency (dep_insn) - 2; + + default: + return 0; + } + } + } + + /* For other anti dependencies, the cost is 0. */ + return 0; + + case REG_DEP_OUTPUT: + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if (attr_type == TYPE_FPLOAD) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* A fpload can't be issued until one cycle before a + preceding arithmetic operation has finished if + the target of the fpload is the destination of the + arithmetic operation. + + Exception: For PA7100LC, PA7200 and PA7300, the cost + is 3 cycles, unless they bundle together. We also + pay the penalty if the second insn is a fpload. */ + return insn_default_latency (dep_insn) - 1; + + default: + return 0; + } + } + } + else if (attr_type == TYPE_FPALU) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* An ALU flop can't be issued until two cycles before a + preceding divide or sqrt operation has finished if + the target of the ALU flop is also the target of + the divide or sqrt operation. */ + return insn_default_latency (dep_insn) - 2; + + default: + return 0; + } + } + } + + /* For other output dependencies, the cost is 0. */ + return 0; + + default: + gcc_unreachable (); + } +} + +/* Adjust scheduling priorities. We use this to try and keep addil + and the next use of %r1 close together. */ +static int +pa_adjust_priority (rtx insn, int priority) +{ + rtx set = single_set (insn); + rtx src, dest; + if (set) + { + src = SET_SRC (set); + dest = SET_DEST (set); + if (GET_CODE (src) == LO_SUM + && symbolic_operand (XEXP (src, 1), VOIDmode) + && ! read_only_operand (XEXP (src, 1), VOIDmode)) + priority >>= 3; + + else if (GET_CODE (src) == MEM + && GET_CODE (XEXP (src, 0)) == LO_SUM + && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode) + && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode)) + priority >>= 1; + + else if (GET_CODE (dest) == MEM + && GET_CODE (XEXP (dest, 0)) == LO_SUM + && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode) + && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)) + priority >>= 3; + } + return priority; +} + +/* The 700 can only issue a single insn at a time. + The 7XXX processors can issue two insns at a time. + The 8000 can issue 4 insns at a time. */ +static int +pa_issue_rate (void) +{ + switch (pa_cpu) + { + case PROCESSOR_700: return 1; + case PROCESSOR_7100: return 2; + case PROCESSOR_7100LC: return 2; + case PROCESSOR_7200: return 2; + case PROCESSOR_7300: return 2; + case PROCESSOR_8000: return 4; + + default: + gcc_unreachable (); + } +} + + + +/* Return any length plus adjustment needed by INSN which already has + its length computed as LENGTH. Return LENGTH if no adjustment is + necessary. + + Also compute the length of an inline block move here as it is too + complicated to express as a length attribute in pa.md. */ +int +pa_adjust_insn_length (rtx insn, int length) +{ + rtx pat = PATTERN (insn); + + /* If length is negative or undefined, provide initial length. */ + if ((unsigned int) length >= INT_MAX) + { + if (GET_CODE (pat) == SEQUENCE) + insn = XVECEXP (pat, 0, 0); + + switch (get_attr_type (insn)) + { + case TYPE_MILLI: + length = attr_length_millicode_call (insn); + break; + case TYPE_CALL: + length = attr_length_call (insn, 0); + break; + case TYPE_SIBCALL: + length = attr_length_call (insn, 1); + break; + case TYPE_DYNCALL: + length = attr_length_indirect_call (insn); + break; + case TYPE_SH_FUNC_ADRS: + length = attr_length_millicode_call (insn) + 20; + break; + default: + gcc_unreachable (); + } + } + + /* Jumps inside switch tables which have unfilled delay slots need + adjustment. */ + if (GET_CODE (insn) == JUMP_INSN + && GET_CODE (pat) == PARALLEL + && get_attr_type (insn) == TYPE_BTABLE_BRANCH) + length += 4; + /* Block move pattern. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) + length += compute_movmem_length (insn) - 4; + /* Block clear pattern. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM + && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) + length += compute_clrmem_length (insn) - 4; + /* Conditional branch with an unfilled delay slot. */ + else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) + { + /* Adjust a short backwards conditional with an unfilled delay slot. */ + if (GET_CODE (pat) == SET + && length == 4 + && JUMP_LABEL (insn) != NULL_RTX + && ! forward_branch_p (insn)) + length += 4; + else if (GET_CODE (pat) == PARALLEL + && get_attr_type (insn) == TYPE_PARALLEL_BRANCH + && length == 4) + length += 4; + /* Adjust dbra insn with short backwards conditional branch with + unfilled delay slot -- only for case where counter is in a + general register register. */ + else if (GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 1)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG + && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) + && length == 4 + && ! forward_branch_p (insn)) + length += 4; + } + return length; +} + +/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */ + +static bool +pa_print_operand_punct_valid_p (unsigned char code) +{ + if (code == '@' + || code == '#' + || code == '*' + || code == '^') + return true; + + return false; +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +void +print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case '#': + /* Output a 'nop' if there's nothing for the delay slot. */ + if (dbr_sequence_length () == 0) + fputs ("\n\tnop", file); + return; + case '*': + /* Output a nullification completer if there's nothing for the */ + /* delay slot or nullification is requested. */ + if (dbr_sequence_length () == 0 || + (final_sequence && + INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) + fputs (",n", file); + return; + case 'R': + /* Print out the second register name of a register pair. + I.e., R (6) => 7. */ + fputs (reg_names[REGNO (x) + 1], file); + return; + case 'r': + /* A register or zero. */ + if (x == const0_rtx + || (x == CONST0_RTX (DFmode)) + || (x == CONST0_RTX (SFmode))) + { + fputs ("%r0", file); + return; + } + else + break; + case 'f': + /* A register or zero (floating point). */ + if (x == const0_rtx + || (x == CONST0_RTX (DFmode)) + || (x == CONST0_RTX (SFmode))) + { + fputs ("%fr0", file); + return; + } + else + break; + case 'A': + { + rtx xoperands[2]; + + xoperands[0] = XEXP (XEXP (x, 0), 0); + xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); + output_global_address (file, xoperands[1], 0); + fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); + return; + } + + case 'C': /* Plain (C)ondition */ + case 'X': + switch (GET_CODE (x)) + { + case EQ: + fputs ("=", file); break; + case NE: + fputs ("<>", file); break; + case GT: + fputs (">", file); break; + case GE: + fputs (">=", file); break; + case GEU: + fputs (">>=", file); break; + case GTU: + fputs (">>", file); break; + case LT: + fputs ("<", file); break; + case LE: + fputs ("<=", file); break; + case LEU: + fputs ("<<=", file); break; + case LTU: + fputs ("<<", file); break; + default: + gcc_unreachable (); + } + return; + case 'N': /* Condition, (N)egated */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("<>", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs ("<=", file); break; + case GE: + fputs ("<", file); break; + case GEU: + fputs ("<<", file); break; + case GTU: + fputs ("<<=", file); break; + case LT: + fputs (">=", file); break; + case LE: + fputs (">", file); break; + case LEU: + fputs (">>", file); break; + case LTU: + fputs (">>=", file); break; + default: + gcc_unreachable (); + } + return; + /* For floating point comparisons. Note that the output + predicates are the complement of the desired mode. The + conditions for GT, GE, LT, LE and LTGT cause an invalid + operation exception if the result is unordered and this + exception is enabled in the floating-point status register. */ + case 'Y': + switch (GET_CODE (x)) + { + case EQ: + fputs ("!=", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs ("!>", file); break; + case GE: + fputs ("!>=", file); break; + case LT: + fputs ("!<", file); break; + case LE: + fputs ("!<=", file); break; + case LTGT: + fputs ("!<>", file); break; + case UNLE: + fputs ("!?<=", file); break; + case UNLT: + fputs ("!?<", file); break; + case UNGE: + fputs ("!?>=", file); break; + case UNGT: + fputs ("!?>", file); break; + case UNEQ: + fputs ("!?=", file); break; + case UNORDERED: + fputs ("!?", file); break; + case ORDERED: + fputs ("?", file); break; + default: + gcc_unreachable (); + } + return; + case 'S': /* Condition, operands are (S)wapped. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("=", file); break; + case NE: + fputs ("<>", file); break; + case GT: + fputs ("<", file); break; + case GE: + fputs ("<=", file); break; + case GEU: + fputs ("<<=", file); break; + case GTU: + fputs ("<<", file); break; + case LT: + fputs (">", file); break; + case LE: + fputs (">=", file); break; + case LEU: + fputs (">>=", file); break; + case LTU: + fputs (">>", file); break; + default: + gcc_unreachable (); + } + return; + case 'B': /* Condition, (B)oth swapped and negate. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("<>", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs (">=", file); break; + case GE: + fputs (">", file); break; + case GEU: + fputs (">>", file); break; + case GTU: + fputs (">>=", file); break; + case LT: + fputs ("<=", file); break; + case LE: + fputs ("<", file); break; + case LEU: + fputs ("<<", file); break; + case LTU: + fputs ("<<=", file); break; + default: + gcc_unreachable (); + } + return; + case 'k': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); + return; + case 'Q': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); + return; + case 'L': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); + return; + case 'O': + gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + case 'p': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); + return; + case 'P': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); + return; + case 'I': + if (GET_CODE (x) == CONST_INT) + fputs ("i", file); + return; + case 'M': + case 'F': + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_DEC: + case PRE_INC: + if (ASSEMBLER_DIALECT == 0) + fputs ("s,mb", file); + else + fputs (",mb", file); + break; + case POST_DEC: + case POST_INC: + if (ASSEMBLER_DIALECT == 0) + fputs ("s,ma", file); + else + fputs (",ma", file); + break; + case PLUS: + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) + { + if (ASSEMBLER_DIALECT == 0) + fputs ("x", file); + } + else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) + { + if (ASSEMBLER_DIALECT == 0) + fputs ("x,s", file); + else + fputs (",s", file); + } + else if (code == 'F' && ASSEMBLER_DIALECT == 0) + fputs ("s", file); + break; + default: + if (code == 'F' && ASSEMBLER_DIALECT == 0) + fputs ("s", file); + break; + } + return; + case 'G': + output_global_address (file, x, 0); + return; + case 'H': + output_global_address (file, x, 1); + return; + case 0: /* Don't do anything special */ + break; + case 'Z': + { + unsigned op[3]; + compute_zdepwi_operands (INTVAL (x), op); + fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); + return; + } + case 'z': + { + unsigned op[3]; + compute_zdepdi_operands (INTVAL (x), op); + fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); + return; + } + case 'c': + /* We can get here from a .vtable_inherit due to our + CONSTANT_ADDRESS_P rejecting perfectly good constant + addresses. */ + break; + default: + gcc_unreachable (); + } + if (GET_CODE (x) == REG) + { + fputs (reg_names [REGNO (x)], file); + if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) + { + fputs ("R", file); + return; + } + if (FP_REG_P (x) + && GET_MODE_SIZE (GET_MODE (x)) <= 4 + && (REGNO (x) & 1) == 0) + fputs ("L", file); + } + else if (GET_CODE (x) == MEM) + { + int size = GET_MODE_SIZE (GET_MODE (x)); + rtx base = NULL_RTX; + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_DEC: + case POST_DEC: + base = XEXP (XEXP (x, 0), 0); + fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); + break; + case PRE_INC: + case POST_INC: + base = XEXP (XEXP (x, 0), 0); + fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); + break; + case PLUS: + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) + fprintf (file, "%s(%s)", + reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], + reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); + else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) + fprintf (file, "%s(%s)", + reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], + reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); + else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) + { + /* Because the REG_POINTER flag can get lost during reload, + GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the + index and base registers in the combined move patterns. */ + rtx base = XEXP (XEXP (x, 0), 1); + rtx index = XEXP (XEXP (x, 0), 0); + + fprintf (file, "%s(%s)", + reg_names [REGNO (index)], reg_names [REGNO (base)]); + } + else + output_address (XEXP (x, 0)); + break; + default: + output_address (XEXP (x, 0)); + break; + } + } + else + output_addr_const (file, x); +} + +/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ + +void +output_global_address (FILE *file, rtx x, int round_constant) +{ + + /* Imagine (high (const (plus ...))). */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) + output_addr_const (file, x); + else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) + { + output_addr_const (file, x); + fputs ("-$global$", file); + } + else if (GET_CODE (x) == CONST) + { + const char *sep = ""; + int offset = 0; /* assembler wants -$global$ at end */ + rtx base = NULL_RTX; + + switch (GET_CODE (XEXP (XEXP (x, 0), 0))) + { + case SYMBOL_REF: + base = XEXP (XEXP (x, 0), 0); + output_addr_const (file, base); + break; + case CONST_INT: + offset = INTVAL (XEXP (XEXP (x, 0), 0)); + break; + default: + gcc_unreachable (); + } + + switch (GET_CODE (XEXP (XEXP (x, 0), 1))) + { + case SYMBOL_REF: + base = XEXP (XEXP (x, 0), 1); + output_addr_const (file, base); + break; + case CONST_INT: + offset = INTVAL (XEXP (XEXP (x, 0), 1)); + break; + default: + gcc_unreachable (); + } + + /* How bogus. The compiler is apparently responsible for + rounding the constant if it uses an LR field selector. + + The linker and/or assembler seem a better place since + they have to do this kind of thing already. + + If we fail to do this, HP's optimizing linker may eliminate + an addil, but not update the ldw/stw/ldo instruction that + uses the result of the addil. */ + if (round_constant) + offset = ((offset + 0x1000) & ~0x1fff); + + switch (GET_CODE (XEXP (x, 0))) + { + case PLUS: + if (offset < 0) + { + offset = -offset; + sep = "-"; + } + else + sep = "+"; + break; + + case MINUS: + gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); + sep = "-"; + break; + + default: + gcc_unreachable (); + } + + if (!read_only_operand (base, VOIDmode) && !flag_pic) + fputs ("-$global$", file); + if (offset) + fprintf (file, "%s%d", sep, offset); + } + else + output_addr_const (file, x); +} + +/* Output boilerplate text to appear at the beginning of the file. + There are several possible versions. */ +#define aputs(x) fputs(x, asm_out_file) +static inline void +pa_file_start_level (void) +{ + if (TARGET_64BIT) + aputs ("\t.LEVEL 2.0w\n"); + else if (TARGET_PA_20) + aputs ("\t.LEVEL 2.0\n"); + else if (TARGET_PA_11) + aputs ("\t.LEVEL 1.1\n"); + else + aputs ("\t.LEVEL 1.0\n"); +} + +static inline void +pa_file_start_space (int sortspace) +{ + aputs ("\t.SPACE $PRIVATE$"); + if (sortspace) + aputs (",SORT=16"); + aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31" + "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" + "\n\t.SPACE $TEXT$"); + if (sortspace) + aputs (",SORT=8"); + aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" + "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); +} + +static inline void +pa_file_start_file (int want_version) +{ + if (write_symbols != NO_DEBUG) + { + output_file_directive (asm_out_file, main_input_filename); + if (want_version) + aputs ("\t.version\t\"01.01\"\n"); + } +} + +static inline void +pa_file_start_mcount (const char *aswhat) +{ + if (profile_flag) + fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); +} + +static void +pa_elf_file_start (void) +{ + pa_file_start_level (); + pa_file_start_mcount ("ENTRY"); + pa_file_start_file (0); +} + +static void +pa_som_file_start (void) +{ + pa_file_start_level (); + pa_file_start_space (0); + aputs ("\t.IMPORT $global$,DATA\n" + "\t.IMPORT $$dyncall,MILLICODE\n"); + pa_file_start_mcount ("CODE"); + pa_file_start_file (0); +} + +static void +pa_linux_file_start (void) +{ + pa_file_start_file (1); + pa_file_start_level (); + pa_file_start_mcount ("CODE"); +} + +static void +pa_hpux64_gas_file_start (void) +{ + pa_file_start_level (); +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE + if (profile_flag) + ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); +#endif + pa_file_start_file (1); +} + +static void +pa_hpux64_hpas_file_start (void) +{ + pa_file_start_level (); + pa_file_start_space (1); + pa_file_start_mcount ("CODE"); + pa_file_start_file (0); +} +#undef aputs + +/* Search the deferred plabel list for SYMBOL and return its internal + label. If an entry for SYMBOL is not found, a new entry is created. */ + +rtx +get_deferred_plabel (rtx symbol) +{ + const char *fname = XSTR (symbol, 0); + size_t i; + + /* See if we have already put this function on the list of deferred + plabels. This list is generally small, so a liner search is not + too ugly. If it proves too slow replace it with something faster. */ + for (i = 0; i < n_deferred_plabels; i++) + if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) + break; + + /* If the deferred plabel list is empty, or this entry was not found + on the list, create a new entry on the list. */ + if (deferred_plabels == NULL || i == n_deferred_plabels) + { + tree id; + + if (deferred_plabels == 0) + deferred_plabels = ggc_alloc_deferred_plabel (); + else + deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel, + deferred_plabels, + n_deferred_plabels + 1); + + i = n_deferred_plabels++; + deferred_plabels[i].internal_label = gen_label_rtx (); + deferred_plabels[i].symbol = symbol; + + /* Gross. We have just implicitly taken the address of this + function. Mark it in the same manner as assemble_name. */ + id = maybe_get_identifier (targetm.strip_name_encoding (fname)); + if (id) + mark_referenced (id); + } + + return deferred_plabels[i].internal_label; +} + +static void +output_deferred_plabels (void) +{ + size_t i; + + /* If we have some deferred plabels, then we need to switch into the + data or readonly data section, and align it to a 4 byte boundary + before outputting the deferred plabels. */ + if (n_deferred_plabels) + { + switch_to_section (flag_pic ? data_section : readonly_data_section); + ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + } + + /* Now output the deferred plabels. */ + for (i = 0; i < n_deferred_plabels; i++) + { + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); + assemble_integer (deferred_plabels[i].symbol, + TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); + } +} + +#if HPUX_LONG_DOUBLE_LIBRARY +/* Initialize optabs to point to HPUX long double emulation routines. */ +static void +pa_hpux_init_libfuncs (void) +{ + set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); + set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); + set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); + set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); + set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); + set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); + set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); + set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); + set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); + + set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); + set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); + set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); + set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); + set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); + set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); + set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); + + set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); + set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); + set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); + set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); + + set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT + ? "__U_Qfcnvfxt_quad_to_sgl" + : "_U_Qfcnvfxt_quad_to_sgl"); + set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); + set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl"); + set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl"); + + set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); + set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); + set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad"); + set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad"); +} +#endif + +/* HP's millicode routines mean something special to the assembler. + Keep track of which ones we have used. */ + +enum millicodes { remI, remU, divI, divU, mulI, end1000 }; +static void import_milli (enum millicodes); +static char imported[(int) end1000]; +static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; +static const char import_string[] = ".IMPORT $$....,MILLICODE"; +#define MILLI_START 10 + +static void +import_milli (enum millicodes code) +{ + char str[sizeof (import_string)]; + + if (!imported[(int) code]) + { + imported[(int) code] = 1; + strcpy (str, import_string); + strncpy (str + MILLI_START, milli_names[(int) code], 4); + output_asm_insn (str, 0); + } +} + +/* The register constraints have put the operands and return value in + the proper registers. */ + +const char * +output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn) +{ + import_milli (mulI); + return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); +} + +/* Emit the rtl for doing a division by a constant. */ + +/* Do magic division millicodes exist for this value? */ +const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; + +/* We'll use an array to keep track of the magic millicodes and + whether or not we've used them already. [n][0] is signed, [n][1] is + unsigned. */ + +static int div_milli[16][2]; + +int +emit_hpdiv_const (rtx *operands, int unsignedp) +{ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) < 16 + && magic_milli[INTVAL (operands[2])]) + { + rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); + + emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); + emit + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), + gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, + SImode, + gen_rtx_REG (SImode, 26), + operands[2])), + gen_rtx_CLOBBER (VOIDmode, operands[4]), + gen_rtx_CLOBBER (VOIDmode, operands[3]), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), + gen_rtx_CLOBBER (VOIDmode, ret)))); + emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); + return 1; + } + return 0; +} + +const char * +output_div_insn (rtx *operands, int unsignedp, rtx insn) +{ + int divisor; + + /* If the divisor is a constant, try to use one of the special + opcodes .*/ + if (GET_CODE (operands[0]) == CONST_INT) + { + static char buf[100]; + divisor = INTVAL (operands[0]); + if (!div_milli[divisor][unsignedp]) + { + div_milli[divisor][unsignedp] = 1; + if (unsignedp) + output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); + else + output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); + } + if (unsignedp) + { + sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, + INTVAL (operands[0])); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, buf)); + } + else + { + sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, + INTVAL (operands[0])); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, buf)); + } + } + /* Divisor isn't a special constant. */ + else + { + if (unsignedp) + { + import_milli (divU); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$divU")); + } + else + { + import_milli (divI); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$divI")); + } + } +} + +/* Output a $$rem millicode to do mod. */ + +const char * +output_mod_insn (int unsignedp, rtx insn) +{ + if (unsignedp) + { + import_milli (remU); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$remU")); + } + else + { + import_milli (remI); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$remI")); + } +} + +void +output_arg_descriptor (rtx call_insn) +{ + const char *arg_regs[4]; + enum machine_mode arg_mode; + rtx link; + int i, output_flag = 0; + int regno; + + /* We neither need nor want argument location descriptors for the + 64bit runtime environment or the ELF32 environment. */ + if (TARGET_64BIT || TARGET_ELF32) + return; + + for (i = 0; i < 4; i++) + arg_regs[i] = 0; + + /* Specify explicitly that no argument relocations should take place + if using the portable runtime calling conventions. */ + if (TARGET_PORTABLE_RUNTIME) + { + fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", + asm_out_file); + return; + } + + gcc_assert (GET_CODE (call_insn) == CALL_INSN); + for (link = CALL_INSN_FUNCTION_USAGE (call_insn); + link; link = XEXP (link, 1)) + { + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + if (regno >= 23 && regno <= 26) + { + arg_regs[26 - regno] = "GR"; + if (arg_mode == DImode) + arg_regs[25 - regno] = "GR"; + } + else if (regno >= 32 && regno <= 39) + { + if (arg_mode == SFmode) + arg_regs[(regno - 32) / 2] = "FR"; + else + { +#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED + arg_regs[(regno - 34) / 2] = "FR"; + arg_regs[(regno - 34) / 2 + 1] = "FU"; +#else + arg_regs[(regno - 34) / 2] = "FU"; + arg_regs[(regno - 34) / 2 + 1] = "FR"; +#endif + } + } + } + fputs ("\t.CALL ", asm_out_file); + for (i = 0; i < 4; i++) + { + if (arg_regs[i]) + { + if (output_flag++) + fputc (',', asm_out_file); + fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); + } + } + fputc ('\n', asm_out_file); +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. */ + +static reg_class_t +pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + int regno; + enum reg_class rclass = (enum reg_class) rclass_i; + + /* Handle the easy stuff first. */ + if (rclass == R1_REGS) + return NO_REGS; + + if (REG_P (x)) + { + regno = REGNO (x); + if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) + return NO_REGS; + } + else + regno = -1; + + /* If we have something like (mem (mem (...)), we can safely assume the + inner MEM will end up in a general register after reloading, so there's + no need for a secondary reload. */ + if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) + return NO_REGS; + + /* Trying to load a constant into a FP register during PIC code + generation requires %r1 as a scratch register. */ + if (flag_pic + && (mode == SImode || mode == DImode) + && FP_REG_CLASS_P (rclass) + && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) + { + sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1 + : CODE_FOR_reload_indi_r1); + return NO_REGS; + } + + /* Secondary reloads of symbolic operands require %r1 as a scratch + register when we're generating PIC code and when the operand isn't + readonly. */ + if (symbolic_expression_p (x)) + { + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + if (flag_pic || !read_only_operand (x, VOIDmode)) + { + gcc_assert (mode == SImode || mode == DImode); + sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1 + : CODE_FOR_reload_indi_r1); + return NO_REGS; + } + } + + /* Profiling showed the PA port spends about 1.3% of its compilation + time in true_regnum from calls inside pa_secondary_reload_class. */ + if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) + regno = true_regnum (x); + + /* In order to allow 14-bit displacements in integer loads and stores, + we need to prevent reload from generating out of range integer mode + loads and stores to the floating point registers. Previously, we + used to call for a secondary reload and have emit_move_sequence() + fix the instruction sequence. However, reload occasionally wouldn't + generate the reload and we would end up with an invalid REG+D memory + address. So, now we use an intermediate general register for most + memory loads and stores. */ + if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) + && GET_MODE_CLASS (mode) == MODE_INT + && FP_REG_CLASS_P (rclass)) + { + /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check + the secondary reload needed for a pseudo. It never passes a + REG+D address. */ + if (GET_CODE (x) == MEM) + { + x = XEXP (x, 0); + + /* We don't need an intermediate for indexed and LO_SUM DLT + memory addresses. When INT14_OK_STRICT is true, it might + appear that we could directly allow register indirect + memory addresses. However, this doesn't work because we + don't support SUBREGs in floating-point register copies + and reload doesn't tell us when it's going to use a SUBREG. */ + if (IS_INDEX_ADDR_P (x) + || IS_LO_SUM_DLT_ADDR_P (x)) + return NO_REGS; + + /* Otherwise, we need an intermediate general register. */ + return GENERAL_REGS; + } + + /* Request a secondary reload with a general scratch register + for everthing else. ??? Could symbolic operands be handled + directly when generating non-pic PA 2.0 code? */ + sri->icode = (in_p + ? direct_optab_handler (reload_in_optab, mode) + : direct_optab_handler (reload_out_optab, mode)); + return NO_REGS; + } + + /* A SAR<->FP register copy requires an intermediate general register + and secondary memory. We need a secondary reload with a general + scratch register for spills. */ + if (rclass == SHIFT_REGS) + { + /* Handle spill. */ + if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) + { + sri->icode = (in_p + ? direct_optab_handler (reload_in_optab, mode) + : direct_optab_handler (reload_out_optab, mode)); + return NO_REGS; + } + + /* Handle FP copy. */ + if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) + return GENERAL_REGS; + } + + if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER + && REGNO_REG_CLASS (regno) == SHIFT_REGS + && FP_REG_CLASS_P (rclass)) + return GENERAL_REGS; + + return NO_REGS; +} + +/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer + is only marked as live on entry by df-scan when it is a fixed + register. It isn't a fixed register in the 64-bit runtime, + so we need to mark it here. */ + +static void +pa_extra_live_on_entry (bitmap regs) +{ + if (TARGET_64BIT) + bitmap_set_bit (regs, ARG_POINTER_REGNUM); +} + +/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile + to prevent it from being deleted. */ + +rtx +pa_eh_return_handler_rtx (void) +{ + rtx tmp; + + tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx, + TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); + tmp = gen_rtx_MEM (word_mode, tmp); + tmp->volatil = 1; + return tmp; +} + +/* In the 32-bit runtime, arguments larger than eight bytes are passed + by invisible reference. As a GCC extension, we also pass anything + with a zero or variable size by reference. + + The 64-bit runtime does not describe passing any types by invisible + reference. The internals of GCC can't currently handle passing + empty structures, and zero or variable length arrays when they are + not passed entirely on the stack or by reference. Thus, as a GCC + extension, we pass these types by reference. The HP compiler doesn't + support these types, so hopefully there shouldn't be any compatibility + issues. This may have to be revisited when HP releases a C99 compiler + or updates the ABI. */ + +static bool +pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size; + + if (type) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + if (TARGET_64BIT) + return size <= 0; + else + return size <= 0 || size > 8; +} + +enum direction +function_arg_padding (enum machine_mode mode, const_tree type) +{ + if (mode == BLKmode + || (TARGET_64BIT + && type + && (AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE))) + { + /* Return none if justification is not required. */ + if (type + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) + return none; + + /* The directions set here are ignored when a BLKmode argument larger + than a word is placed in a register. Different code is used for + the stack and registers. This makes it difficult to have a + consistent data representation for both the stack and registers. + For both runtimes, the justification and padding for arguments on + the stack and in registers should be identical. */ + if (TARGET_64BIT) + /* The 64-bit runtime specifies left justification for aggregates. */ + return upward; + else + /* The 32-bit runtime architecture specifies right justification. + When the argument is passed on the stack, the argument is padded + with garbage on the left. The HP compiler pads with zeros. */ + return downward; + } + + if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) + return downward; + else + return none; +} + + +/* Do what is necessary for `va_start'. We look at the current function + to determine if stdargs or varargs is used and fill in an initial + va_list. A pointer to this constructor is returned. */ + +static rtx +hppa_builtin_saveregs (void) +{ + rtx offset, dest; + tree fntype = TREE_TYPE (current_function_decl); + int argadj = ((!stdarg_p (fntype)) + ? UNITS_PER_WORD : 0); + + if (argadj) + offset = plus_constant (crtl->args.arg_offset_rtx, argadj); + else + offset = crtl->args.arg_offset_rtx; + + if (TARGET_64BIT) + { + int i, off; + + /* Adjust for varargs/stdarg differences. */ + if (argadj) + offset = plus_constant (crtl->args.arg_offset_rtx, -argadj); + else + offset = crtl->args.arg_offset_rtx; + + /* We need to save %r26 .. %r19 inclusive starting at offset -64 + from the incoming arg pointer and growing to larger addresses. */ + for (i = 26, off = -64; i >= 19; i--, off += 8) + emit_move_insn (gen_rtx_MEM (word_mode, + plus_constant (arg_pointer_rtx, off)), + gen_rtx_REG (word_mode, i)); + + /* The incoming args pointer points just beyond the flushback area; + normally this is not a serious concern. However, when we are doing + varargs/stdargs we want to make the arg pointer point to the start + of the incoming argument area. */ + emit_move_insn (virtual_incoming_args_rtx, + plus_constant (arg_pointer_rtx, -64)); + + /* Now return a pointer to the first anonymous argument. */ + return copy_to_reg (expand_binop (Pmode, add_optab, + virtual_incoming_args_rtx, + offset, 0, 0, OPTAB_LIB_WIDEN)); + } + + /* Store general registers on the stack. */ + dest = gen_rtx_MEM (BLKmode, + plus_constant (crtl->args.internal_arg_pointer, + -16)); + set_mem_alias_set (dest, get_varargs_alias_set ()); + set_mem_align (dest, BITS_PER_WORD); + move_block_from_reg (23, dest, 4); + + /* move_block_from_reg will emit code to store the argument registers + individually as scalar stores. + + However, other insns may later load from the same addresses for + a structure load (passing a struct to a varargs routine). + + The alias code assumes that such aliasing can never happen, so we + have to keep memory referencing insns from moving up beyond the + last argument register store. So we emit a blockage insn here. */ + emit_insn (gen_blockage ()); + + return copy_to_reg (expand_binop (Pmode, add_optab, + crtl->args.internal_arg_pointer, + offset, 0, 0, OPTAB_LIB_WIDEN)); +} + +static void +hppa_va_start (tree valist, rtx nextarg) +{ + nextarg = expand_builtin_saveregs (); + std_expand_builtin_va_start (valist, nextarg); +} + +static tree +hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + if (TARGET_64BIT) + { + /* Args grow upward. We can use the generic routines. */ + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + } + else /* !TARGET_64BIT */ + { + tree ptr = build_pointer_type (type); + tree valist_type; + tree t, u; + unsigned int size, ofs; + bool indirect; + + indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); + if (indirect) + { + type = ptr; + ptr = build_pointer_type (type); + } + size = int_size_in_bytes (type); + valist_type = TREE_TYPE (valist); + + /* Args grow down. Not handled by generic routines. */ + + u = fold_convert (sizetype, size_in_bytes (type)); + u = fold_build1 (NEGATE_EXPR, sizetype, u); + t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u); + + /* Align to 4 or 8 byte boundary depending on argument size. */ + + u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); + t = fold_convert (valist_type, t); + + t = build2 (MODIFY_EXPR, valist_type, valist, t); + + ofs = (8 - size) % 4; + if (ofs != 0) + { + u = size_int (ofs); + t = build2 (POINTER_PLUS_EXPR, valist_type, t, u); + } + + t = fold_convert (ptr, t); + t = build_va_arg_indirect_ref (t); + + if (indirect) + t = build_va_arg_indirect_ref (t); + + return t; + } +} + +/* True if MODE is valid for the target. By "valid", we mean able to + be manipulated in non-trivial ways. In particular, this means all + the arithmetic is supported. + + Currently, TImode is not valid as the HP 64-bit runtime documentation + doesn't document the alignment and calling conventions for this type. + Thus, we return false when PRECISION is 2 * BITS_PER_WORD and + 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ + +static bool +pa_scalar_mode_supported_p (enum machine_mode mode) +{ + int precision = GET_MODE_PRECISION (mode); + + switch (GET_MODE_CLASS (mode)) + { + case MODE_PARTIAL_INT: + case MODE_INT: + if (precision == CHAR_TYPE_SIZE) + return true; + if (precision == SHORT_TYPE_SIZE) + return true; + if (precision == INT_TYPE_SIZE) + return true; + if (precision == LONG_TYPE_SIZE) + return true; + if (precision == LONG_LONG_TYPE_SIZE) + return true; + return false; + + case MODE_FLOAT: + if (precision == FLOAT_TYPE_SIZE) + return true; + if (precision == DOUBLE_TYPE_SIZE) + return true; + if (precision == LONG_DOUBLE_TYPE_SIZE) + return true; + return false; + + case MODE_DECIMAL_FLOAT: + return false; + + default: + gcc_unreachable (); + } +} + +/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and + it branches into the delay slot. Otherwise, return FALSE. */ + +static bool +branch_to_delay_slot_p (rtx insn) +{ + rtx jump_insn; + + if (dbr_sequence_length ()) + return FALSE; + + jump_insn = next_active_insn (JUMP_LABEL (insn)); + while (insn) + { + insn = next_active_insn (insn); + if (jump_insn == insn) + return TRUE; + + /* We can't rely on the length of asms. So, we return FALSE when + the branch is followed by an asm. */ + if (!insn + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || extract_asm_operands (PATTERN (insn)) != NULL_RTX + || get_attr_length (insn) > 0) + break; + } + + return FALSE; +} + +/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. + + This occurs when INSN has an unfilled delay slot and is followed + by an asm. Disaster can occur if the asm is empty and the jump + branches into the delay slot. So, we add a nop in the delay slot + when this occurs. */ + +static bool +branch_needs_nop_p (rtx insn) +{ + rtx jump_insn; + + if (dbr_sequence_length ()) + return FALSE; + + jump_insn = next_active_insn (JUMP_LABEL (insn)); + while (insn) + { + insn = next_active_insn (insn); + if (!insn || jump_insn == insn) + return TRUE; + + if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT + || extract_asm_operands (PATTERN (insn)) != NULL_RTX) + && get_attr_length (insn) > 0) + break; + } + + return FALSE; +} + +/* Return TRUE if INSN, a forward jump insn, can use nullification + to skip the following instruction. This avoids an extra cycle due + to a mis-predicted branch when we fall through. */ + +static bool +use_skip_p (rtx insn) +{ + rtx jump_insn = next_active_insn (JUMP_LABEL (insn)); + + while (insn) + { + insn = next_active_insn (insn); + + /* We can't rely on the length of asms, so we can't skip asms. */ + if (!insn + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || extract_asm_operands (PATTERN (insn)) != NULL_RTX) + break; + if (get_attr_length (insn) == 4 + && jump_insn == next_active_insn (insn)) + return TRUE; + if (get_attr_length (insn) > 0) + break; + } + + return FALSE; +} + +/* This routine handles all the normal conditional branch sequences we + might need to generate. It handles compare immediate vs compare + register, nullification of delay slots, varying length branches, + negated branches, and all combinations of the above. It returns the + output appropriate to emit the branch corresponding to all given + parameters. */ + +const char * +output_cbranch (rtx *operands, int negated, rtx insn) +{ + static char buf[100]; + bool useskip; + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + int xdelay; + + /* A conditional branch to the following instruction (e.g. the delay slot) + is asking for a disaster. This can happen when not optimizing and + when jump optimization fails. + + While it is usually safe to emit nothing, this can fail if the + preceding instruction is a nullified branch with an empty delay + slot and the same branch target as this branch. We could check + for this but jump optimization should eliminate nop jumps. It + is always safe to emit a nop. */ + if (branch_to_delay_slot_p (insn)) + return "nop"; + + /* The doubleword form of the cmpib instruction doesn't have the LEU + and GTU conditions while the cmpb instruction does. Since we accept + zero for cmpb, we must ensure that we use cmpb for the comparison. */ + if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) + operands[2] = gen_rtx_REG (DImode, 0); + if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) + operands[1] = gen_rtx_REG (DImode, 0); + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + comclr instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; + + switch (length) + { + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); + else + strcpy (buf, "{com%I2b,|cmp%I2b,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%B3"); + else + strcat (buf, "%S3"); + if (useskip) + strcat (buf, " %2,%r1,%%r0"); + else if (nullify) + { + if (branch_needs_nop_p (insn)) + strcat (buf, ",n %2,%r1,%0%#"); + else + strcat (buf, ",n %2,%r1,%0"); + } + else + strcat (buf, " %2,%r1,%0"); + break; + + /* All long conditionals. Note a short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "{com%I2b,|cmp%I2b,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%S3"); + else + strcat (buf, "%B3"); + strcat (buf, ",n %2,%r1,.+12\n\tb %0"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a comb;nop rather than comiclr;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + { + strcpy (buf, "{com%I2b,|cmp%I2b,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%B3 %2,%r1,%0%#"); + else + strcat (buf, "%S3 %2,%r1,%0%#"); + } + else + { + strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%S3"); + else + strcat (buf, "%B3"); + if (nullify) + strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); + else + strcat (buf, " %2,%r1,%%r0\n\tb %0"); + } + break; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + /* Create a reversed conditional branch which branches around + the following insns. */ + if (GET_MODE (operands[1]) != DImode) + { + if (nullify) + { + if (negated) + strcpy (buf, + "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); + } + else + { + if (negated) + strcpy (buf, + "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); + } + } + else + { + if (nullify) + { + if (negated) + strcpy (buf, + "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); + } + else + { + if (negated) + strcpy (buf, + "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); + } + } + + output_asm_insn (buf, operands); + return output_lbranch (operands[0], insn, xdelay); + } + return buf; +} + +/* This routine handles output of long unconditional branches that + exceed the maximum range of a simple branch instruction. Since + we don't have a register available for the branch, we save register + %r1 in the frame marker, load the branch destination DEST into %r1, + execute the branch, and restore %r1 in the delay slot of the branch. + + Since long branches may have an insn in the delay slot and the + delay slot is used to restore %r1, we in general need to extract + this insn and execute it before the branch. However, to facilitate + use of this function by conditional branches, we also provide an + option to not extract the delay insn so that it will be emitted + after the long branch. So, if there is an insn in the delay slot, + it is extracted if XDELAY is nonzero. + + The lengths of the various long-branch sequences are 20, 16 and 24 + bytes for the portable runtime, non-PIC and PIC cases, respectively. */ + +const char * +output_lbranch (rtx dest, rtx insn, int xdelay) +{ + rtx xoperands[2]; + + xoperands[0] = dest; + + /* First, free up the delay slot. */ + if (xdelay && dbr_sequence_length () != 0) + { + /* We can't handle a jump in the delay slot. */ + gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN); + + final_scan_insn (NEXT_INSN (insn), asm_out_file, + optimize, 0, NULL); + + /* Now delete the delay insn. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + } + + /* Output an insn to save %r1. The runtime documentation doesn't + specify whether the "Clean Up" slot in the callers frame can + be clobbered by the callee. It isn't copied by HP's builtin + alloca, so this suggests that it can be clobbered if necessary. + The "Static Link" location is copied by HP builtin alloca, so + we avoid using it. Using the cleanup slot might be a problem + if we have to interoperate with languages that pass cleanup + information. However, it should be possible to handle these + situations with GCC's asm feature. + + The "Current RP" slot is reserved for the called procedure, so + we try to use it when we don't have a frame of our own. It's + rather unlikely that we won't have a frame when we need to emit + a very long branch. + + Really the way to go long term is a register scavenger; goto + the target of the jump and find a register which we can use + as a scratch to hold the value in %r1. Then, we wouldn't have + to free up the delay slot or clobber a slot that may be needed + for other purposes. */ + if (TARGET_64BIT) + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + /* Use the return pointer slot in the frame marker. */ + output_asm_insn ("std %%r1,-16(%%r30)", xoperands); + else + /* Use the slot at -40 in the frame marker since HP builtin + alloca doesn't copy it. */ + output_asm_insn ("std %%r1,-40(%%r30)", xoperands); + } + else + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + /* Use the return pointer slot in the frame marker. */ + output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); + else + /* Use the "Clean Up" slot in the frame marker. In GCC, + the only other use of this location is for copying a + floating point double argument from a floating-point + register to two general registers. The copy is done + as an "atomic" operation when outputting a call, so it + won't interfere with our using the location here. */ + output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); + } + + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + else if (flag_pic) + { + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + if (TARGET_SOM || !TARGET_GAS) + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); + } + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + else + /* Now output a very long branch to the original target. */ + output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); + + /* Now restore the value of %r1 in the delay slot. */ + if (TARGET_64BIT) + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + return "ldd -16(%%r30),%%r1"; + else + return "ldd -40(%%r30),%%r1"; + } + else + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + return "ldw -20(%%r30),%%r1"; + else + return "ldw -12(%%r30),%%r1"; + } +} + +/* This routine handles all the branch-on-bit conditional branch sequences we + might need to generate. It handles nullification of delay slots, + varying length branches, negated branches and all combinations of the + above. it returns the appropriate output template to emit the branch. */ + +const char * +output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) +{ + static char buf[100]; + bool useskip; + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + int xdelay; + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. I do not think this can happen as this pattern + is only used when optimizing; jump optimization should eliminate the + jump. But be prepared just in case. */ + + if (branch_to_delay_slot_p (insn)) + return "nop"; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + extrs instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; + + switch (length) + { + + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "{extrs,|extrw,s,}"); + else + strcpy (buf, "bb,"); + if (useskip && GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + else if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (useskip) + strcat (buf, " %0,%1,1,%%r0"); + else if (nullify && negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, ",n %0,%1,%3%#"); + else + strcat (buf, ",n %0,%1,%3"); + } + else if (nullify && ! negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, ",n %0,%1,%2%#"); + else + strcat (buf, ",n %0,%1,%2"); + } + else if (! nullify && negated) + strcat (buf, " %0,%1,%3"); + else if (! nullify && ! negated) + strcat (buf, " %0,%1,%2"); + break; + + /* All long conditionals. Note a short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "bb,"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (negated) + strcat (buf, ",n %0,%1,.+12\n\tb %3"); + else + strcat (buf, ",n %0,%1,.+12\n\tb %2"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a bb;nop rather than extrs;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + { + strcpy (buf, "bb,"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (negated) + strcat (buf, " %0,%1,%3%#"); + else + strcat (buf, " %0,%1,%2%#"); + } + else + { + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + else + strcpy (buf, "{extrs,|extrw,s,}"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify && negated) + strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); + else if (nullify && ! negated) + strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); + else if (negated) + strcat (buf, " %0,%1,1,%%r0\n\tb %3"); + else + strcat (buf, " %0,%1,1,%%r0\n\tb %2"); + } + break; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + else + strcpy (buf, "bb,"); + if ((which == 0 && negated) + || (which == 1 && !negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify) + strcat (buf, ",n %0,%1,.+%4"); + else + strcat (buf, " %0,%1,.+%4"); + output_asm_insn (buf, operands); + return output_lbranch (negated ? operands[3] : operands[2], + insn, xdelay); + } + return buf; +} + +/* This routine handles all the branch-on-variable-bit conditional branch + sequences we might need to generate. It handles nullification of delay + slots, varying length branches, negated branches and all combinations + of the above. it returns the appropriate output template to emit the + branch. */ + +const char * +output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) +{ + static char buf[100]; + bool useskip; + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + int xdelay; + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. I do not think this can happen as this pattern + is only used when optimizing; jump optimization should eliminate the + jump. But be prepared just in case. */ + + if (branch_to_delay_slot_p (insn)) + return "nop"; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + extrs instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; + + switch (length) + { + + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "{vextrs,|extrw,s,}"); + else + strcpy (buf, "{bvb,|bb,}"); + if (useskip && GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + else if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (useskip) + strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); + else if (nullify && negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); + else + strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); + } + else if (nullify && ! negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); + else + strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); + } + else if (! nullify && negated) + strcat (buf, "{ %0,%3| %0,%%sar,%3}"); + else if (! nullify && ! negated) + strcat (buf, "{ %0,%2| %0,%%sar,%2}"); + break; + + /* All long conditionals. Note a short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "{bvb,|bb,}"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (negated) + strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); + else + strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a bb;nop rather than extrs;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + { + strcpy (buf, "{bvb,|bb,}"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (negated) + strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); + else + strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); + } + else + { + strcpy (buf, "{vextrs,|extrw,s,}"); + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify && negated) + strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); + else if (nullify && ! negated) + strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); + else if (negated) + strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); + else + strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); + } + break; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + else + strcpy (buf, "{bvb,|bb,}"); + if ((which == 0 && negated) + || (which == 1 && !negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify) + strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); + else + strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); + output_asm_insn (buf, operands); + return output_lbranch (negated ? operands[3] : operands[2], + insn, xdelay); + } + return buf; +} + +/* Return the output template for emitting a dbra type insn. + + Note it may perform some output operations on its own before + returning the final output string. */ +const char * +output_dbra (rtx *operands, rtx insn, int which_alternative) +{ + int length = get_attr_length (insn); + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. Be prepared! */ + + if (branch_to_delay_slot_p (insn)) + { + if (which_alternative == 0) + return "ldo %1(%0),%0"; + else if (which_alternative == 1) + { + output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); + output_asm_insn ("ldw -16(%%r30),%4", operands); + output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); + return "{fldws|fldw} -16(%%r30),%0"; + } + else + { + output_asm_insn ("ldw %0,%4", operands); + return "ldo %1(%4),%4\n\tstw %4,%0"; + } + } + + if (which_alternative == 0) + { + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int xdelay; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + switch (length) + { + case 4: + if (nullify) + { + if (branch_needs_nop_p (insn)) + return "addib,%C2,n %1,%0,%3%#"; + else + return "addib,%C2,n %1,%0,%3"; + } + else + return "addib,%C2 %1,%0,%3"; + + case 8: + /* Handle weird backwards branch with a fulled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + return "addib,%N2,n %1,%0,.+12\n\tb %3"; + /* Handle short backwards branch with an unfilled delay slot. + Using a addb;nop rather than addi;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + return "addib,%C2 %1,%0,%3%#"; + + /* Handle normal cases. */ + if (nullify) + return "addi,%N2 %1,%0,%0\n\tb,n %3"; + else + return "addi,%N2 %1,%0,%0\n\tb %3"; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (nullify) + output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); + else + output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); + + return output_lbranch (operands[3], insn, xdelay); + } + + } + /* Deal with gross reload from FP register case. */ + else if (which_alternative == 1) + { + /* Move loop counter from FP register to MEM then into a GR, + increment the GR, store the GR into MEM, and finally reload + the FP register from MEM from within the branch's delay slot. */ + output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", + operands); + output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); + if (length == 24) + return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; + else if (length == 28) + return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; + else + { + operands[5] = GEN_INT (length - 16); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); + output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); + return output_lbranch (operands[3], insn, 0); + } + } + /* Deal with gross reload from memory case. */ + else + { + /* Reload loop counter from memory, the store back to memory + happens in the branch's delay slot. */ + output_asm_insn ("ldw %0,%4", operands); + if (length == 12) + return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; + else if (length == 16) + return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; + else + { + operands[5] = GEN_INT (length - 4); + output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); + return output_lbranch (operands[3], insn, 0); + } + } +} + +/* Return the output template for emitting a movb type insn. + + Note it may perform some output operations on its own before + returning the final output string. */ +const char * +output_movb (rtx *operands, rtx insn, int which_alternative, + int reverse_comparison) +{ + int length = get_attr_length (insn); + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. Be prepared! */ + + if (branch_to_delay_slot_p (insn)) + { + if (which_alternative == 0) + return "copy %1,%0"; + else if (which_alternative == 1) + { + output_asm_insn ("stw %1,-16(%%r30)", operands); + return "{fldws|fldw} -16(%%r30),%0"; + } + else if (which_alternative == 2) + return "stw %1,%0"; + else + return "mtsar %r1"; + } + + /* Support the second variant. */ + if (reverse_comparison) + PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); + + if (which_alternative == 0) + { + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int xdelay; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + switch (length) + { + case 4: + if (nullify) + { + if (branch_needs_nop_p (insn)) + return "movb,%C2,n %1,%0,%3%#"; + else + return "movb,%C2,n %1,%0,%3"; + } + else + return "movb,%C2 %1,%0,%3"; + + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + return "movb,%N2,n %1,%0,.+12\n\tb %3"; + + /* Handle short backwards branch with an unfilled delay slot. + Using a movb;nop rather than or;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + return "movb,%C2 %1,%0,%3%#"; + /* Handle normal cases. */ + if (nullify) + return "or,%N2 %1,%%r0,%0\n\tb,n %3"; + else + return "or,%N2 %1,%%r0,%0\n\tb %3"; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (nullify) + output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); + else + output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); + + return output_lbranch (operands[3], insn, xdelay); + } + } + /* Deal with gross reload for FP destination register case. */ + else if (which_alternative == 1) + { + /* Move source register to MEM, perform the branch test, then + finally load the FP register from MEM from within the branch's + delay slot. */ + output_asm_insn ("stw %1,-16(%%r30)", operands); + if (length == 12) + return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; + else if (length == 16) + return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; + else + { + operands[4] = GEN_INT (length - 4); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); + output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); + return output_lbranch (operands[3], insn, 0); + } + } + /* Deal with gross reload from memory case. */ + else if (which_alternative == 2) + { + /* Reload loop counter from memory, the store back to memory + happens in the branch's delay slot. */ + if (length == 8) + return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; + else if (length == 12) + return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; + else + { + operands[4] = GEN_INT (length); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", + operands); + return output_lbranch (operands[3], insn, 0); + } + } + /* Handle SAR as a destination. */ + else + { + if (length == 8) + return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; + else if (length == 12) + return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; + else + { + operands[4] = GEN_INT (length); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", + operands); + return output_lbranch (operands[3], insn, 0); + } + } +} + +/* Copy any FP arguments in INSN into integer registers. */ +static void +copy_fp_args (rtx insn) +{ + rtx link; + rtx xoperands[2]; + + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + int arg_mode, regno; + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + + /* Is it a floating point register? */ + if (regno >= 32 && regno <= 39) + { + /* Copy the FP register into an integer register via memory. */ + if (arg_mode == SFmode) + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); + output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + else + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); + output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + } + } +} + +/* Compute length of the FP argument copy sequence for INSN. */ +static int +length_fp_args (rtx insn) +{ + int length = 0; + rtx link; + + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + int arg_mode, regno; + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + + /* Is it a floating point register? */ + if (regno >= 32 && regno <= 39) + { + if (arg_mode == SFmode) + length += 8; + else + length += 12; + } + } + + return length; +} + +/* Return the attribute length for the millicode call instruction INSN. + The length must match the code generated by output_millicode_call. + We include the delay slot in the returned length as it is better to + over estimate the length than to under estimate it. */ + +int +attr_length_millicode_call (rtx insn) +{ + unsigned long distance = -1; + unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; + + if (INSN_ADDRESSES_SET_P ()) + { + distance = (total + insn_current_reference_address (insn)); + if (distance < total) + distance = -1; + } + + if (TARGET_64BIT) + { + if (!TARGET_LONG_CALLS && distance < 7600000) + return 8; + + return 20; + } + else if (TARGET_PORTABLE_RUNTIME) + return 24; + else + { + if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET) + return 8; + + if (TARGET_LONG_ABS_CALL && !flag_pic) + return 12; + + return 24; + } +} + +/* INSN is a function call. It may have an unconditional jump + in its delay slot. + + CALL_DEST is the routine we are calling. */ + +const char * +output_millicode_call (rtx insn, rtx call_dest) +{ + int attr_length = get_attr_length (insn); + int seq_length = dbr_sequence_length (); + int distance; + rtx seq_insn; + rtx xoperands[3]; + + xoperands[0] = call_dest; + xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); + + /* Handle the common case where we are sure that the branch will + reach the beginning of the $CODE$ subspace. The within reach + form of the $$sh_func_adrs call has a length of 28. Because it + has an attribute type of sh_func_adrs, it never has a nonzero + sequence length (i.e., the delay slot is never filled). */ + if (!TARGET_LONG_CALLS + && (attr_length == 8 + || (attr_length == 28 + && get_attr_type (insn) == TYPE_SH_FUNC_ADRS))) + { + output_asm_insn ("{bl|b,l} %0,%2", xoperands); + } + else + { + if (TARGET_64BIT) + { + /* It might seem that one insn could be saved by accessing + the millicode function using the linkage table. However, + this doesn't work in shared libraries and other dynamically + loaded objects. Using a pc-relative sequence also avoids + problems related to the implicit use of the gp register. */ + output_asm_insn ("b,l .+8,%%r1", xoperands); + + if (TARGET_GAS) + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); + } + else + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); + } + + output_asm_insn ("bve,l (%%r1),%%r2", xoperands); + } + else if (TARGET_PORTABLE_RUNTIME) + { + /* Pure portable runtime doesn't allow be/ble; we also don't + have PIC support in the assembler/linker, so this sequence + is needed. */ + + /* Get the address of our target into %r1. */ + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); + + /* Get our return address into %r31. */ + output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); + output_asm_insn ("addi 8,%%r31,%%r31", xoperands); + + /* Jump to our target address in %r1. */ + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + else if (!flag_pic) + { + output_asm_insn ("ldil L'%0,%%r1", xoperands); + if (TARGET_PA_20) + output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); + else + output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); + } + else + { + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + output_asm_insn ("addi 16,%%r1,%%r31", xoperands); + + if (TARGET_SOM || !TARGET_GAS) + { + /* The HP assembler can generate relocations for the + difference of two symbols. GAS can do this for a + millicode symbol but not an arbitrary external + symbol when generating SOM output. */ + xoperands[1] = gen_label_rtx (); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); + output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1", + xoperands); + } + + /* Jump to our target address in %r1. */ + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + } + + if (seq_length == 0) + output_asm_insn ("nop", xoperands); + + /* We are done if there isn't a jump in the delay slot. */ + if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) + return ""; + + /* This call has an unconditional jump in its delay slot. */ + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + + /* See if the return address can be adjusted. Use the containing + sequence insn's address. */ + if (INSN_ADDRESSES_SET_P ()) + { + seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); + distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) + - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); + + if (VAL_14_BITS_P (distance)) + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("ldo %0-%1(%2),%2", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + } + else + /* ??? This branch may not reach its target. */ + output_asm_insn ("nop\n\tb,n %0", xoperands); + } + else + /* ??? This branch may not reach its target. */ + output_asm_insn ("nop\n\tb,n %0", xoperands); + + /* Delete the jump. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + + return ""; +} + +/* Return the attribute length of the call instruction INSN. The SIBCALL + flag indicates whether INSN is a regular call or a sibling call. The + length returned must be longer than the code actually generated by + output_call. Since branch shortening is done before delay branch + sequencing, there is no way to determine whether or not the delay + slot will be filled during branch shortening. Even when the delay + slot is filled, we may have to add a nop if the delay slot contains + a branch that can't reach its target. Thus, we always have to include + the delay slot in the length estimate. This used to be done in + pa_adjust_insn_length but we do it here now as some sequences always + fill the delay slot and we can save four bytes in the estimate for + these sequences. */ + +int +attr_length_call (rtx insn, int sibcall) +{ + int local_call; + rtx call, call_dest; + tree call_decl; + int length = 0; + rtx pat = PATTERN (insn); + unsigned long distance = -1; + + gcc_assert (GET_CODE (insn) == CALL_INSN); + + if (INSN_ADDRESSES_SET_P ()) + { + unsigned long total; + + total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; + distance = (total + insn_current_reference_address (insn)); + if (distance < total) + distance = -1; + } + + gcc_assert (GET_CODE (pat) == PARALLEL); + + /* Get the call rtx. */ + call = XVECEXP (pat, 0, 0); + if (GET_CODE (call) == SET) + call = SET_SRC (call); + + gcc_assert (GET_CODE (call) == CALL); + + /* Determine if this is a local call. */ + call_dest = XEXP (XEXP (call, 0), 0); + call_decl = SYMBOL_REF_DECL (call_dest); + local_call = call_decl && targetm.binds_local_p (call_decl); + + /* pc-relative branch. */ + if (!TARGET_LONG_CALLS + && ((TARGET_PA_20 && !sibcall && distance < 7600000) + || distance < MAX_PCREL17F_OFFSET)) + length += 8; + + /* 64-bit plabel sequence. */ + else if (TARGET_64BIT && !local_call) + length += sibcall ? 28 : 24; + + /* non-pic long absolute branch sequence. */ + else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) + length += 12; + + /* long pc-relative branch sequence. */ + else if (TARGET_LONG_PIC_SDIFF_CALL + || (TARGET_GAS && !TARGET_SOM + && (TARGET_LONG_PIC_PCREL_CALL || local_call))) + { + length += 20; + + if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) + length += 8; + } + + /* 32-bit plabel sequence. */ + else + { + length += 32; + + if (TARGET_SOM) + length += length_fp_args (insn); + + if (flag_pic) + length += 4; + + if (!TARGET_PA_20) + { + if (!sibcall) + length += 8; + + if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) + length += 8; + } + } + + return length; +} + +/* INSN is a function call. It may have an unconditional jump + in its delay slot. + + CALL_DEST is the routine we are calling. */ + +const char * +output_call (rtx insn, rtx call_dest, int sibcall) +{ + int delay_insn_deleted = 0; + int delay_slot_filled = 0; + int seq_length = dbr_sequence_length (); + tree call_decl = SYMBOL_REF_DECL (call_dest); + int local_call = call_decl && targetm.binds_local_p (call_decl); + rtx xoperands[2]; + + xoperands[0] = call_dest; + + /* Handle the common case where we're sure that the branch will reach + the beginning of the "$CODE$" subspace. This is the beginning of + the current function if we are in a named section. */ + if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8) + { + xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); + output_asm_insn ("{bl|b,l} %0,%1", xoperands); + } + else + { + if (TARGET_64BIT && !local_call) + { + /* ??? As far as I can tell, the HP linker doesn't support the + long pc-relative sequence described in the 64-bit runtime + architecture. So, we use a slightly longer indirect call. */ + xoperands[0] = get_deferred_plabel (call_dest); + xoperands[1] = gen_label_rtx (); + + /* If this isn't a sibcall, we put the load of %r27 into the + delay slot. We can't do this in a sibcall as we don't + have a second call-clobbered scratch register available. */ + if (seq_length != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && !sibcall) + { + final_scan_insn (NEXT_INSN (insn), asm_out_file, + optimize, 0, NULL); + + /* Now delete the delay insn. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + delay_insn_deleted = 1; + } + + output_asm_insn ("addil LT'%0,%%r27", xoperands); + output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); + output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); + + if (sibcall) + { + output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); + output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); + output_asm_insn ("bve (%%r1)", xoperands); + } + else + { + output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); + output_asm_insn ("bve,l (%%r2),%%r2", xoperands); + output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); + delay_slot_filled = 1; + } + } + else + { + int indirect_call = 0; + + /* Emit a long call. There are several different sequences + of increasing length and complexity. In most cases, + they don't allow an instruction in the delay slot. */ + if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) + && !TARGET_LONG_PIC_SDIFF_CALL + && !(TARGET_GAS && !TARGET_SOM + && (TARGET_LONG_PIC_PCREL_CALL || local_call)) + && !TARGET_64BIT) + indirect_call = 1; + + if (seq_length != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && !sibcall + && (!TARGET_PA_20 + || indirect_call + || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) + { + /* A non-jump insn in the delay slot. By definition we can + emit this insn before the call (and in fact before argument + relocating. */ + final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, + NULL); + + /* Now delete the delay insn. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + delay_insn_deleted = 1; + } + + if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) + { + /* This is the best sequence for making long calls in + non-pic code. Unfortunately, GNU ld doesn't provide + the stub needed for external calls, and GAS's support + for this with the SOM linker is buggy. It is safe + to use this for local calls. */ + output_asm_insn ("ldil L'%0,%%r1", xoperands); + if (sibcall) + output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); + else + { + if (TARGET_PA_20) + output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", + xoperands); + else + output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); + + output_asm_insn ("copy %%r31,%%r2", xoperands); + delay_slot_filled = 1; + } + } + else + { + if (TARGET_LONG_PIC_SDIFF_CALL) + { + /* The HP assembler and linker can handle relocations + for the difference of two symbols. The HP assembler + recognizes the sequence as a pc-relative call and + the linker provides stubs when needed. */ + xoperands[1] = gen_label_rtx (); + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); + } + else if (TARGET_GAS && !TARGET_SOM + && (TARGET_LONG_PIC_PCREL_CALL || local_call)) + { + /* GAS currently can't generate the relocations that + are needed for the SOM linker under HP-UX using this + sequence. The GNU linker doesn't generate the stubs + that are needed for external calls on TARGET_ELF32 + with this sequence. For now, we have to use a + longer plabel sequence when using GAS. */ + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", + xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", + xoperands); + } + else + { + /* Emit a long plabel-based call sequence. This is + essentially an inline implementation of $$dyncall. + We don't actually try to call $$dyncall as this is + as difficult as calling the function itself. */ + xoperands[0] = get_deferred_plabel (call_dest); + xoperands[1] = gen_label_rtx (); + + /* Since the call is indirect, FP arguments in registers + need to be copied to the general registers. Then, the + argument relocation stub will copy them back. */ + if (TARGET_SOM) + copy_fp_args (insn); + + if (flag_pic) + { + output_asm_insn ("addil LT'%0,%%r19", xoperands); + output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); + output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil LR'%0-$global$,%%r27", + xoperands); + output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", + xoperands); + } + + output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r1", xoperands); + output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); + + if (!sibcall && !TARGET_PA_20) + { + output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); + if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) + output_asm_insn ("addi 8,%%r2,%%r2", xoperands); + else + output_asm_insn ("addi 16,%%r2,%%r2", xoperands); + } + } + + if (TARGET_PA_20) + { + if (sibcall) + output_asm_insn ("bve (%%r1)", xoperands); + else + { + if (indirect_call) + { + output_asm_insn ("bve,l (%%r1),%%r2", xoperands); + output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); + delay_slot_filled = 1; + } + else + output_asm_insn ("bve,l (%%r1),%%r2", xoperands); + } + } + else + { + if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) + output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", + xoperands); + + if (sibcall) + { + if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) + output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); + else + output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); + } + else + { + if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) + output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); + else + output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); + + if (indirect_call) + output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); + else + output_asm_insn ("copy %%r31,%%r2", xoperands); + delay_slot_filled = 1; + } + } + } + } + } + + if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted)) + output_asm_insn ("nop", xoperands); + + /* We are done if there isn't a jump in the delay slot. */ + if (seq_length == 0 + || delay_insn_deleted + || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) + return ""; + + /* A sibcall should never have a branch in the delay slot. */ + gcc_assert (!sibcall); + + /* This call has an unconditional jump in its delay slot. */ + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + + if (!delay_slot_filled && INSN_ADDRESSES_SET_P ()) + { + /* See if the return address can be adjusted. Use the containing + sequence insn's address. This would break the regular call/return@ + relationship assumed by the table based eh unwinder, so only do that + if the call is not possibly throwing. */ + rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); + int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) + - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); + + if (VAL_14_BITS_P (distance) + && !(can_throw_internal (insn) || can_throw_external (insn))) + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + } + else + output_asm_insn ("nop\n\tb,n %0", xoperands); + } + else + output_asm_insn ("b,n %0", xoperands); + + /* Delete the jump. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + + return ""; +} + +/* Return the attribute length of the indirect call instruction INSN. + The length must match the code generated by output_indirect call. + The returned length includes the delay slot. Currently, the delay + slot of an indirect call sequence is not exposed and it is used by + the sequence itself. */ + +int +attr_length_indirect_call (rtx insn) +{ + unsigned long distance = -1; + unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; + + if (INSN_ADDRESSES_SET_P ()) + { + distance = (total + insn_current_reference_address (insn)); + if (distance < total) + distance = -1; + } + + if (TARGET_64BIT) + return 12; + + if (TARGET_FAST_INDIRECT_CALLS + || (!TARGET_PORTABLE_RUNTIME + && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) + || distance < MAX_PCREL17F_OFFSET))) + return 8; + + if (flag_pic) + return 24; + + if (TARGET_PORTABLE_RUNTIME) + return 20; + + /* Out of reach, can use ble. */ + return 12; +} + +const char * +output_indirect_call (rtx insn, rtx call_dest) +{ + rtx xoperands[1]; + + if (TARGET_64BIT) + { + xoperands[0] = call_dest; + output_asm_insn ("ldd 16(%0),%%r2", xoperands); + output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands); + return ""; + } + + /* First the special case for kernels, level 0 systems, etc. */ + if (TARGET_FAST_INDIRECT_CALLS) + return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; + + /* Now the normal case -- we can reach $$dyncall directly or + we're sure that we can get there via a long-branch stub. + + No need to check target flags as the length uniquely identifies + the remaining cases. */ + if (attr_length_indirect_call (insn) == 8) + { + /* The HP linker sometimes substitutes a BLE for BL/B,L calls to + $$dyncall. Since BLE uses %r31 as the link register, the 22-bit + variant of the B,L instruction can't be used on the SOM target. */ + if (TARGET_PA_20 && !TARGET_SOM) + return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31"; + else + return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; + } + + /* Long millicode call, but we are not generating PIC or portable runtime + code. */ + if (attr_length_indirect_call (insn) == 12) + return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; + + /* Long millicode call for portable runtime. */ + if (attr_length_indirect_call (insn) == 20) + return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop"; + + /* We need a long PIC call to $$dyncall. */ + xoperands[0] = NULL_RTX; + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + if (TARGET_SOM || !TARGET_GAS) + { + xoperands[0] = gen_label_rtx (); + output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[0])); + output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1", + xoperands); + } + output_asm_insn ("blr %%r0,%%r2", xoperands); + output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands); + return ""; +} + +/* Return the total length of the save and restore instructions needed for + the data linkage table pointer (i.e., the PIC register) across the call + instruction INSN. No-return calls do not require a save and restore. + In addition, we may be able to avoid the save and restore for calls + within the same translation unit. */ + +int +attr_length_save_restore_dltp (rtx insn) +{ + if (find_reg_note (insn, REG_NORETURN, NULL_RTX)) + return 0; + + return 8; +} + +/* In HPUX 8.0's shared library scheme, special relocations are needed + for function labels if they might be passed to a function + in a shared library (because shared libraries don't live in code + space), and special magic is needed to construct their address. */ + +void +hppa_encode_label (rtx sym) +{ + const char *str = XSTR (sym, 0); + int len = strlen (str) + 1; + char *newstr, *p; + + p = newstr = XALLOCAVEC (char, len + 1); + *p++ = '@'; + strcpy (p, str); + + XSTR (sym, 0) = ggc_alloc_string (newstr, len); +} + +static void +pa_encode_section_info (tree decl, rtx rtl, int first) +{ + int old_referenced = 0; + + if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) + old_referenced + = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; + + default_encode_section_info (decl, rtl, first); + + if (first && TEXT_SPACE_P (decl)) + { + SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; + if (TREE_CODE (decl) == FUNCTION_DECL) + hppa_encode_label (XEXP (rtl, 0)); + } + else if (old_referenced) + SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; +} + +/* This is sort of inverse to pa_encode_section_info. */ + +static const char * +pa_strip_name_encoding (const char *str) +{ + str += (*str == '@'); + str += (*str == '*'); + return str; +} + +int +function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0)); +} + +/* Returns 1 if OP is a function label involved in a simple addition + with a constant. Used to keep certain patterns from matching + during instruction combination. */ +int +is_function_label_plus_const (rtx op) +{ + /* Strip off any CONST. */ + if (GET_CODE (op) == CONST) + op = XEXP (op, 0); + + return (GET_CODE (op) == PLUS + && function_label_operand (XEXP (op, 0), Pmode) + && GET_CODE (XEXP (op, 1)) == CONST_INT); +} + +/* Output assembly code for a thunk to FUNCTION. */ + +static void +pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree function) +{ + static unsigned int current_thunk_number; + int val_14 = VAL_14_BITS_P (delta); + unsigned int old_last_address = last_address, nbytes = 0; + char label[16]; + rtx xoperands[4]; + + xoperands[0] = XEXP (DECL_RTL (function), 0); + xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); + xoperands[2] = GEN_INT (delta); + + ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0)); + fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n"); + + /* Output the thunk. We know that the function is in the same + translation unit (i.e., the same space) as the thunk, and that + thunks are output after their method. Thus, we don't need an + external branch to reach the function. With SOM and GAS, + functions and thunks are effectively in different sections. + Thus, we can always use a IA-relative branch and the linker + will add a long branch stub if necessary. + + However, we have to be careful when generating PIC code on the + SOM port to ensure that the sequence does not transfer to an + import stub for the target function as this could clobber the + return value saved at SP-24. This would also apply to the + 32-bit linux port if the multi-space model is implemented. */ + if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && !(flag_pic && TREE_PUBLIC (function)) + && (TARGET_GAS || last_address < 262132)) + || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && ((targetm.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) != NULL + /* The GNU 64-bit linker has rather poor stub management. + So, we use a long branch from thunks that aren't in + the same section as the target function. */ + && ((!TARGET_64BIT + && (DECL_SECTION_NAME (thunk_fndecl) + != DECL_SECTION_NAME (function))) + || ((DECL_SECTION_NAME (thunk_fndecl) + == DECL_SECTION_NAME (function)) + && last_address < 262132))) + || (targetm.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) == NULL + && DECL_SECTION_NAME (function) == NULL + && last_address < 262132) + || (!targetm.have_named_sections && last_address < 262132)))) + { + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("b %0", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 8; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 12; + } + } + else if (TARGET_64BIT) + { + /* We only have one call-clobbered scratch register, so we can't + make use of the delay slot if delta doesn't fit in 14 bits. */ + if (!val_14) + { + output_asm_insn ("addil L'%2,%%r26", xoperands); + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + } + + output_asm_insn ("b,l .+8,%%r1", xoperands); + + if (TARGET_GAS) + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); + } + else + { + xoperands[3] = GEN_INT (val_14 ? 8 : 16); + output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands); + } + + if (val_14) + { + output_asm_insn ("bv %%r0(%%r1)", xoperands); + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 20; + } + else + { + output_asm_insn ("bv,n %%r0(%%r1)", xoperands); + nbytes += 24; + } + } + else if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); + + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 16; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 20; + } + } + else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + { + /* The function is accessible from outside this module. The only + way to avoid an import stub between the thunk and function is to + call the function directly with an indirect sequence similar to + that used by $$dyncall. This is possible because $$dyncall acts + as the import stub in an indirect call. */ + ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); + xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); + output_asm_insn ("addil LT'%3,%%r19", xoperands); + output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + + if (!val_14) + { + output_asm_insn ("addil L'%2,%%r26", xoperands); + nbytes += 4; + } + + if (TARGET_PA_20) + { + output_asm_insn ("bve (%%r22)", xoperands); + nbytes += 36; + } + else if (TARGET_NO_SPACE_REGS) + { + output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); + nbytes += 36; + } + else + { + output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); + output_asm_insn ("mtsp %%r21,%%sr0", xoperands); + output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); + nbytes += 44; + } + + if (val_14) + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + else + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + } + else if (flag_pic) + { + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + + if (TARGET_SOM || !TARGET_GAS) + { + output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands); + output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands); + } + else + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands); + } + + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 20; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 24; + } + } + else + { + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("ldil L'%0,%%r22", xoperands); + output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 12; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 16; + } + } + + fprintf (file, "\t.EXIT\n\t.PROCEND\n"); + + if (TARGET_SOM && TARGET_GAS) + { + /* We done with this subspace except possibly for some additional + debug information. Forget that we are in this subspace to ensure + that the next function is output in its own subspace. */ + in_section = NULL; + cfun->machine->in_nsubspa = 2; + } + + if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + { + switch_to_section (data_section); + output_asm_insn (".align 4", xoperands); + ASM_OUTPUT_LABEL (file, label); + output_asm_insn (".word P'%0", xoperands); + } + + current_thunk_number++; + nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) + & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); + last_address += nbytes; + if (old_last_address > last_address) + last_address = UINT_MAX; + update_total_code_bytes (nbytes); +} + +/* Only direct calls to static functions are allowed to be sibling (tail) + call optimized. + + This restriction is necessary because some linker generated stubs will + store return pointers into rp' in some cases which might clobber a + live value already in rp'. + + In a sibcall the current function and the target function share stack + space. Thus if the path to the current function and the path to the + target function save a value in rp', they save the value into the + same stack slot, which has undesirable consequences. + + Because of the deferred binding nature of shared libraries any function + with external scope could be in a different load module and thus require + rp' to be saved when calling that function. So sibcall optimizations + can only be safe for static function. + + Note that GCC never needs return value relocations, so we don't have to + worry about static calls with return value relocations (which require + saving rp'). + + It is safe to perform a sibcall optimization when the target function + will never return. */ +static bool +pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + if (TARGET_PORTABLE_RUNTIME) + return false; + + /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in + single subspace mode and the call is not indirect. As far as I know, + there is no operating system support for the multiple subspace mode. + It might be possible to support indirect calls if we didn't use + $$dyncall (see the indirect sequence generated in output_call). */ + if (TARGET_ELF32) + return (decl != NULL_TREE); + + /* Sibcalls are not ok because the arg pointer register is not a fixed + register. This prevents the sibcall optimization from occurring. In + addition, there are problems with stub placement using GNU ld. This + is because a normal sibcall branch uses a 17-bit relocation while + a regular call branch uses a 22-bit relocation. As a result, more + care needs to be taken in the placement of long-branch stubs. */ + if (TARGET_64BIT) + return false; + + /* Sibcalls are only ok within a translation unit. */ + return (decl && !TREE_PUBLIC (decl)); +} + +/* ??? Addition is not commutative on the PA due to the weird implicit + space register selection rules for memory addresses. Therefore, we + don't consider a + b == b + a, as this might be inside a MEM. */ +static bool +pa_commutative_p (const_rtx x, int outer_code) +{ + return (COMMUTATIVE_P (x) + && (TARGET_NO_SPACE_REGS + || (outer_code != UNKNOWN && outer_code != MEM) + || GET_CODE (x) != PLUS)); +} + +/* Returns 1 if the 6 operands specified in OPERANDS are suitable for + use in fmpyadd instructions. */ +int +fmpyaddoperands (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + + /* Must be a floating point mode. */ + if (mode != SFmode && mode != DFmode) + return 0; + + /* All modes must be the same. */ + if (! (mode == GET_MODE (operands[1]) + && mode == GET_MODE (operands[2]) + && mode == GET_MODE (operands[3]) + && mode == GET_MODE (operands[4]) + && mode == GET_MODE (operands[5]))) + return 0; + + /* All operands must be registers. */ + if (! (GET_CODE (operands[1]) == REG + && GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[4]) == REG + && GET_CODE (operands[5]) == REG)) + return 0; + + /* Only 2 real operands to the addition. One of the input operands must + be the same as the output operand. */ + if (! rtx_equal_p (operands[3], operands[4]) + && ! rtx_equal_p (operands[3], operands[5])) + return 0; + + /* Inout operand of add cannot conflict with any operands from multiply. */ + if (rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2])) + return 0; + + /* multiply cannot feed into addition operands. */ + if (rtx_equal_p (operands[4], operands[0]) + || rtx_equal_p (operands[5], operands[0])) + return 0; + + /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ + if (mode == SFmode + && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) + return 0; + + /* Passed. Operands are suitable for fmpyadd. */ + return 1; +} + +#if !defined(USE_COLLECT2) +static void +pa_asm_out_constructor (rtx symbol, int priority) +{ + if (!function_label_operand (symbol, VOIDmode)) + hppa_encode_label (symbol); + +#ifdef CTORS_SECTION_ASM_OP + default_ctor_section_asm_out_constructor (symbol, priority); +#else +# ifdef TARGET_ASM_NAMED_SECTION + default_named_section_asm_out_constructor (symbol, priority); +# else + default_stabs_asm_out_constructor (symbol, priority); +# endif +#endif +} + +static void +pa_asm_out_destructor (rtx symbol, int priority) +{ + if (!function_label_operand (symbol, VOIDmode)) + hppa_encode_label (symbol); + +#ifdef DTORS_SECTION_ASM_OP + default_dtor_section_asm_out_destructor (symbol, priority); +#else +# ifdef TARGET_ASM_NAMED_SECTION + default_named_section_asm_out_destructor (symbol, priority); +# else + default_stabs_asm_out_destructor (symbol, priority); +# endif +#endif +} +#endif + +/* This function places uninitialized global data in the bss section. + The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this + function on the SOM port to prevent uninitialized global data from + being placed in the data section. */ + +void +pa_asm_output_aligned_bss (FILE *stream, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + switch_to_section (bss_section); + fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); + +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); +#endif + +#ifdef ASM_OUTPUT_SIZE_DIRECTIVE + ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); +#endif + + fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); + ASM_OUTPUT_LABEL (stream, name); + fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); +} + +/* Both the HP and GNU assemblers under HP-UX provide a .comm directive + that doesn't allow the alignment of global common storage to be directly + specified. The SOM linker aligns common storage based on the rounded + value of the NUM_BYTES parameter in the .comm directive. It's not + possible to use the .align directive as it doesn't affect the alignment + of the label associated with a .comm directive. */ + +void +pa_asm_output_aligned_common (FILE *stream, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + unsigned int max_common_align; + + max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); + if (align > max_common_align) + { + warning (0, "alignment (%u) for %s exceeds maximum alignment " + "for global common data. Using %u", + align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); + align = max_common_align; + } + + switch_to_section (bss_section); + + assemble_name (stream, name); + fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n", + MAX (size, align / BITS_PER_UNIT)); +} + +/* We can't use .comm for local common storage as the SOM linker effectively + treats the symbol as universal and uses the same storage for local symbols + with the same name in different object files. The .block directive + reserves an uninitialized block of storage. However, it's not common + storage. Fortunately, GCC never requests common storage with the same + name in any given translation unit. */ + +void +pa_asm_output_aligned_local (FILE *stream, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + switch_to_section (bss_section); + fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); + +#ifdef LOCAL_ASM_OP + fprintf (stream, "%s", LOCAL_ASM_OP); + assemble_name (stream, name); + fprintf (stream, "\n"); +#endif + + ASM_OUTPUT_LABEL (stream, name); + fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); +} + +/* Returns 1 if the 6 operands specified in OPERANDS are suitable for + use in fmpysub instructions. */ +int +fmpysuboperands (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + + /* Must be a floating point mode. */ + if (mode != SFmode && mode != DFmode) + return 0; + + /* All modes must be the same. */ + if (! (mode == GET_MODE (operands[1]) + && mode == GET_MODE (operands[2]) + && mode == GET_MODE (operands[3]) + && mode == GET_MODE (operands[4]) + && mode == GET_MODE (operands[5]))) + return 0; + + /* All operands must be registers. */ + if (! (GET_CODE (operands[1]) == REG + && GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[4]) == REG + && GET_CODE (operands[5]) == REG)) + return 0; + + /* Only 2 real operands to the subtraction. Subtraction is not a commutative + operation, so operands[4] must be the same as operand[3]. */ + if (! rtx_equal_p (operands[3], operands[4])) + return 0; + + /* multiply cannot feed into subtraction. */ + if (rtx_equal_p (operands[5], operands[0])) + return 0; + + /* Inout operand of sub cannot conflict with any operands from multiply. */ + if (rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2])) + return 0; + + /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ + if (mode == SFmode + && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) + return 0; + + /* Passed. Operands are suitable for fmpysub. */ + return 1; +} + +/* Return 1 if the given constant is 2, 4, or 8. These are the valid + constants for shadd instructions. */ +int +shadd_constant_p (int val) +{ + if (val == 2 || val == 4 || val == 8) + return 1; + else + return 0; +} + +/* Return 1 if OP is valid as a base or index register in a + REG+REG address. */ + +int +borx_reg_operand (rtx op, enum machine_mode mode) +{ + if (GET_CODE (op) != REG) + return 0; + + /* We must reject virtual registers as the only expressions that + can be instantiated are REG and REG+CONST. */ + if (op == virtual_incoming_args_rtx + || op == virtual_stack_vars_rtx + || op == virtual_stack_dynamic_rtx + || op == virtual_outgoing_args_rtx + || op == virtual_cfa_rtx) + return 0; + + /* While it's always safe to index off the frame pointer, it's not + profitable to do so when the frame pointer is being eliminated. */ + if (!reload_completed + && flag_omit_frame_pointer + && !cfun->calls_alloca + && op == frame_pointer_rtx) + return 0; + + return register_operand (op, mode); +} + +/* Return 1 if this operand is anything other than a hard register. */ + +int +non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER); +} + +/* Return TRUE if INSN branches forward. */ + +static bool +forward_branch_p (rtx insn) +{ + rtx lab = JUMP_LABEL (insn); + + /* The INSN must have a jump label. */ + gcc_assert (lab != NULL_RTX); + + if (INSN_ADDRESSES_SET_P ()) + return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); + + while (insn) + { + if (insn == lab) + return true; + else + insn = NEXT_INSN (insn); + } + + return false; +} + +/* Return 1 if OP is an equality comparison, else return 0. */ +int +eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return (GET_CODE (op) == EQ || GET_CODE (op) == NE); +} + +/* Return 1 if INSN is in the delay slot of a call instruction. */ +int +jump_in_call_delay (rtx insn) +{ + + if (GET_CODE (insn) != JUMP_INSN) + return 0; + + if (PREV_INSN (insn) + && PREV_INSN (PREV_INSN (insn)) + && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) + { + rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn))); + + return (GET_CODE (PATTERN (test_insn)) == SEQUENCE + && XVECEXP (PATTERN (test_insn), 0, 1) == insn); + + } + else + return 0; +} + +/* Output an unconditional move and branch insn. */ + +const char * +output_parallel_movb (rtx *operands, rtx insn) +{ + int length = get_attr_length (insn); + + /* These are the cases in which we win. */ + if (length == 4) + return "mov%I1b,tr %1,%0,%2"; + + /* None of the following cases win, but they don't lose either. */ + if (length == 8) + { + if (dbr_sequence_length () == 0) + { + /* Nothing in the delay slot, fake it by putting the combined + insn (the copy or add) in the delay slot of a bl. */ + if (GET_CODE (operands[1]) == CONST_INT) + return "b %2\n\tldi %1,%0"; + else + return "b %2\n\tcopy %1,%0"; + } + else + { + /* Something in the delay slot, but we've got a long branch. */ + if (GET_CODE (operands[1]) == CONST_INT) + return "ldi %1,%0\n\tb %2"; + else + return "copy %1,%0\n\tb %2"; + } + } + + if (GET_CODE (operands[1]) == CONST_INT) + output_asm_insn ("ldi %1,%0", operands); + else + output_asm_insn ("copy %1,%0", operands); + return output_lbranch (operands[2], insn, 1); +} + +/* Output an unconditional add and branch insn. */ + +const char * +output_parallel_addb (rtx *operands, rtx insn) +{ + int length = get_attr_length (insn); + + /* To make life easy we want operand0 to be the shared input/output + operand and operand1 to be the readonly operand. */ + if (operands[0] == operands[1]) + operands[1] = operands[2]; + + /* These are the cases in which we win. */ + if (length == 4) + return "add%I1b,tr %1,%0,%3"; + + /* None of the following cases win, but they don't lose either. */ + if (length == 8) + { + if (dbr_sequence_length () == 0) + /* Nothing in the delay slot, fake it by putting the combined + insn (the copy or add) in the delay slot of a bl. */ + return "b %3\n\tadd%I1 %1,%0,%0"; + else + /* Something in the delay slot, but we've got a long branch. */ + return "add%I1 %1,%0,%0\n\tb %3"; + } + + output_asm_insn ("add%I1 %1,%0,%0", operands); + return output_lbranch (operands[3], insn, 1); +} + +/* Return nonzero if INSN (a jump insn) immediately follows a call + to a named function. This is used to avoid filling the delay slot + of the jump since it can usually be eliminated by modifying RP in + the delay slot of the call. */ + +int +following_call (rtx insn) +{ + if (! TARGET_JUMP_IN_DELAY) + return 0; + + /* Find the previous real insn, skipping NOTEs. */ + insn = PREV_INSN (insn); + while (insn && GET_CODE (insn) == NOTE) + insn = PREV_INSN (insn); + + /* Check for CALL_INSNs and millicode calls. */ + if (insn + && ((GET_CODE (insn) == CALL_INSN + && get_attr_type (insn) != TYPE_DYNCALL) + || (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) != SEQUENCE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI))) + return 1; + + return 0; +} + +/* We use this hook to perform a PA specific optimization which is difficult + to do in earlier passes. + + We want the delay slots of branches within jump tables to be filled. + None of the compiler passes at the moment even has the notion that a + PA jump table doesn't contain addresses, but instead contains actual + instructions! + + Because we actually jump into the table, the addresses of each entry + must stay constant in relation to the beginning of the table (which + itself must stay constant relative to the instruction to jump into + it). I don't believe we can guarantee earlier passes of the compiler + will adhere to those rules. + + So, late in the compilation process we find all the jump tables, and + expand them into real code -- e.g. each entry in the jump table vector + will get an appropriate label followed by a jump to the final target. + + Reorg and the final jump pass can then optimize these branches and + fill their delay slots. We end up with smaller, more efficient code. + + The jump instructions within the table are special; we must be able + to identify them during assembly output (if the jumps don't get filled + we need to emit a nop rather than nullifying the delay slot)). We + identify jumps in switch tables by using insns with the attribute + type TYPE_BTABLE_BRANCH. + + We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB + insns. This serves two purposes, first it prevents jump.c from + noticing that the last N entries in the table jump to the instruction + immediately after the table and deleting the jumps. Second, those + insns mark where we should emit .begin_brtab and .end_brtab directives + when using GAS (allows for better link time optimizations). */ + +static void +pa_reorg (void) +{ + rtx insn; + + remove_useless_addtr_insns (1); + + if (pa_cpu < PROCESSOR_8000) + pa_combine_instructions (); + + + /* This is fairly cheap, so always run it if optimizing. */ + if (optimize > 0 && !TARGET_BIG_SWITCH) + { + /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pattern, tmp, location, label; + unsigned int length, i; + + /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ + if (GET_CODE (insn) != JUMP_INSN + || (GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) + continue; + + /* Emit marker for the beginning of the branch table. */ + emit_insn_before (gen_begin_brtab (), insn); + + pattern = PATTERN (insn); + location = PREV_INSN (insn); + length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); + + for (i = 0; i < length; i++) + { + /* Emit a label before each jump to keep jump.c from + removing this code. */ + tmp = gen_label_rtx (); + LABEL_NUSES (tmp) = 1; + emit_label_after (tmp, location); + location = NEXT_INSN (location); + + if (GET_CODE (pattern) == ADDR_VEC) + label = XEXP (XVECEXP (pattern, 0, i), 0); + else + label = XEXP (XVECEXP (pattern, 1, i), 0); + + tmp = gen_short_jump (label); + + /* Emit the jump itself. */ + tmp = emit_jump_insn_after (tmp, location); + JUMP_LABEL (tmp) = label; + LABEL_NUSES (label)++; + location = NEXT_INSN (location); + + /* Emit a BARRIER after the jump. */ + emit_barrier_after (location); + location = NEXT_INSN (location); + } + + /* Emit marker for the end of the branch table. */ + emit_insn_before (gen_end_brtab (), location); + location = NEXT_INSN (location); + emit_barrier_after (location); + + /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ + delete_insn (insn); + } + } + else + { + /* Still need brtab marker insns. FIXME: the presence of these + markers disables output of the branch table to readonly memory, + and any alignment directives that might be needed. Possibly, + the begin_brtab insn should be output before the label for the + table. This doesn't matter at the moment since the tables are + always output in the text section. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + /* Find an ADDR_VEC insn. */ + if (GET_CODE (insn) != JUMP_INSN + || (GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) + continue; + + /* Now generate markers for the beginning and end of the + branch table. */ + emit_insn_before (gen_begin_brtab (), insn); + emit_insn_after (gen_end_brtab (), insn); + } + } +} + +/* The PA has a number of odd instructions which can perform multiple + tasks at once. On first generation PA machines (PA1.0 and PA1.1) + it may be profitable to combine two instructions into one instruction + with two outputs. It's not profitable PA2.0 machines because the + two outputs would take two slots in the reorder buffers. + + This routine finds instructions which can be combined and combines + them. We only support some of the potential combinations, and we + only try common ways to find suitable instructions. + + * addb can add two registers or a register and a small integer + and jump to a nearby (+-8k) location. Normally the jump to the + nearby location is conditional on the result of the add, but by + using the "true" condition we can make the jump unconditional. + Thus addb can perform two independent operations in one insn. + + * movb is similar to addb in that it can perform a reg->reg + or small immediate->reg copy and jump to a nearby (+-8k location). + + * fmpyadd and fmpysub can perform a FP multiply and either an + FP add or FP sub if the operands of the multiply and add/sub are + independent (there are other minor restrictions). Note both + the fmpy and fadd/fsub can in theory move to better spots according + to data dependencies, but for now we require the fmpy stay at a + fixed location. + + * Many of the memory operations can perform pre & post updates + of index registers. GCC's pre/post increment/decrement addressing + is far too simple to take advantage of all the possibilities. This + pass may not be suitable since those insns may not be independent. + + * comclr can compare two ints or an int and a register, nullify + the following instruction and zero some other register. This + is more difficult to use as it's harder to find an insn which + will generate a comclr than finding something like an unconditional + branch. (conditional moves & long branches create comclr insns). + + * Most arithmetic operations can conditionally skip the next + instruction. They can be viewed as "perform this operation + and conditionally jump to this nearby location" (where nearby + is an insns away). These are difficult to use due to the + branch length restrictions. */ + +static void +pa_combine_instructions (void) +{ + rtx anchor, new_rtx; + + /* This can get expensive since the basic algorithm is on the + order of O(n^2) (or worse). Only do it for -O2 or higher + levels of optimization. */ + if (optimize < 2) + return; + + /* Walk down the list of insns looking for "anchor" insns which + may be combined with "floating" insns. As the name implies, + "anchor" instructions don't move, while "floating" insns may + move around. */ + new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); + new_rtx = make_insn_raw (new_rtx); + + for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) + { + enum attr_pa_combine_type anchor_attr; + enum attr_pa_combine_type floater_attr; + + /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. + Also ignore any special USE insns. */ + if ((GET_CODE (anchor) != INSN + && GET_CODE (anchor) != JUMP_INSN + && GET_CODE (anchor) != CALL_INSN) + || GET_CODE (PATTERN (anchor)) == USE + || GET_CODE (PATTERN (anchor)) == CLOBBER + || GET_CODE (PATTERN (anchor)) == ADDR_VEC + || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) + continue; + + anchor_attr = get_attr_pa_combine_type (anchor); + /* See if anchor is an insn suitable for combination. */ + if (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB + || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && ! forward_branch_p (anchor))) + { + rtx floater; + + for (floater = PREV_INSN (anchor); + floater; + floater = PREV_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new_rtx, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + + else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && floater_attr == PA_COMBINE_TYPE_ADDMOVE) + { + if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) + { + if (pa_can_combine_p (new_rtx, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + else + { + if (pa_can_combine_p (new_rtx, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + SET_SRC (PATTERN (floater)), + SET_SRC (PATTERN (floater)))) + break; + } + } + } + + /* If we didn't find anything on the backwards scan try forwards. */ + if (!floater + && (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) + { + for (floater = anchor; floater; floater = NEXT_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new_rtx, anchor, floater, 1, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), + 0), + XEXP (SET_SRC (PATTERN (floater)), + 1))) + break; + } + } + } + + /* FLOATER will be nonzero if we found a suitable floating + insn for combination with ANCHOR. */ + if (floater + && (anchor_attr == PA_COMBINE_TYPE_FADDSUB + || anchor_attr == PA_COMBINE_TYPE_FMPY)) + { + /* Emit the new instruction and delete the old anchor. */ + emit_insn_before (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + + SET_INSN_DELETED (anchor); + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); + delete_insn (floater); + + continue; + } + else if (floater + && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) + { + rtx temp; + /* Emit the new_jump instruction and delete the old anchor. */ + temp + = emit_jump_insn_before (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + + JUMP_LABEL (temp) = JUMP_LABEL (anchor); + SET_INSN_DELETED (anchor); + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); + delete_insn (floater); + continue; + } + } + } +} + +static int +pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest, + rtx src1, rtx src2) +{ + int insn_code_number; + rtx start, end; + + /* Create a PARALLEL with the patterns of ANCHOR and + FLOATER, try to recognize it, then test constraints + for the resulting pattern. + + If the pattern doesn't match or the constraints + aren't met keep searching for a suitable floater + insn. */ + XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); + XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); + INSN_CODE (new_rtx) = -1; + insn_code_number = recog_memoized (new_rtx); + if (insn_code_number < 0 + || (extract_insn (new_rtx), ! constrain_operands (1))) + return 0; + + if (reversed) + { + start = anchor; + end = floater; + } + else + { + start = floater; + end = anchor; + } + + /* There's up to three operands to consider. One + output and two inputs. + + The output must not be used between FLOATER & ANCHOR + exclusive. The inputs must not be set between + FLOATER and ANCHOR exclusive. */ + + if (reg_used_between_p (dest, start, end)) + return 0; + + if (reg_set_between_p (src1, start, end)) + return 0; + + if (reg_set_between_p (src2, start, end)) + return 0; + + /* If we get here, then everything is good. */ + return 1; +} + +/* Return nonzero if references for INSN are delayed. + + Millicode insns are actually function calls with some special + constraints on arguments and register usage. + + Millicode calls always expect their arguments in the integer argument + registers, and always return their result in %r29 (ret1). They + are expected to clobber their arguments, %r1, %r29, and the return + pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. + + This function tells reorg that the references to arguments and + millicode calls do not appear to happen until after the millicode call. + This allows reorg to put insns which set the argument registers into the + delay slot of the millicode call -- thus they act more like traditional + CALL_INSNs. + + Note we cannot consider side effects of the insn to be delayed because + the branch and link insn will clobber the return pointer. If we happened + to use the return pointer in the delay slot of the call, then we lose. + + get_attr_type will try to recognize the given insn, so make sure to + filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns + in particular. */ +int +insn_refs_are_delayed (rtx insn) +{ + return ((GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) != SEQUENCE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI)); +} + +/* Promote the return value, but not the arguments. */ + +static enum machine_mode +pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return) +{ + if (for_return == 0) + return mode; + return promote_mode (type, mode, punsignedp); +} + +/* On the HP-PA the value is found in register(s) 28(-29), unless + the mode is SF or DF. Then the value is returned in fr4 (32). + + This must perform the same promotions as PROMOTE_MODE, else promoting + return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. + + Small structures must be returned in a PARALLEL on PA64 in order + to match the HP Compiler ABI. */ + +static rtx +pa_function_value (const_tree valtype, + const_tree func ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode valmode; + + if (AGGREGATE_TYPE_P (valtype) + || TREE_CODE (valtype) == COMPLEX_TYPE + || TREE_CODE (valtype) == VECTOR_TYPE) + { + if (TARGET_64BIT) + { + /* Aggregates with a size less than or equal to 128 bits are + returned in GR 28(-29). They are left justified. The pad + bits are undefined. Larger aggregates are returned in + memory. */ + rtx loc[2]; + int i, offset = 0; + int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2; + + for (i = 0; i < ub; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, 28 + i), + GEN_INT (offset)); + offset += 8; + } + + return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); + } + else if (int_size_in_bytes (valtype) > UNITS_PER_WORD) + { + /* Aggregates 5 to 8 bytes in size are returned in general + registers r28-r29 in the same manner as other non + floating-point objects. The data is right-justified and + zero-extended to 64 bits. This is opposite to the normal + justification used on big endian targets and requires + special treatment. */ + rtx loc = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, 28), const0_rtx); + return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); + } + } + + if ((INTEGRAL_TYPE_P (valtype) + && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) + || POINTER_TYPE_P (valtype)) + valmode = word_mode; + else + valmode = TYPE_MODE (valtype); + + if (TREE_CODE (valtype) == REAL_TYPE + && !AGGREGATE_TYPE_P (valtype) + && TYPE_MODE (valtype) != TFmode + && !TARGET_SOFT_FLOAT) + return gen_rtx_REG (valmode, 32); + + return gen_rtx_REG (valmode, 28); +} + +/* Implement the TARGET_LIBCALL_VALUE hook. */ + +static rtx +pa_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + if (! TARGET_SOFT_FLOAT + && (mode == SFmode || mode == DFmode)) + return gen_rtx_REG (mode, 32); + else + return gen_rtx_REG (mode, 28); +} + +/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */ + +static bool +pa_function_value_regno_p (const unsigned int regno) +{ + if (regno == 28 + || (! TARGET_SOFT_FLOAT && regno == 32)) + return true; + + return false; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int arg_size = FUNCTION_ARG_SIZE (mode, type); + + cum->nargs_prototype--; + cum->words += (arg_size + + ((cum->words & 01) + && type != NULL_TREE + && arg_size > 1)); +} + +/* Return the location of a parameter that is passed in a register or NULL + if the parameter has any component that is passed in memory. + + This is new code and will be pushed to into the net sources after + further testing. + + ??? We might want to restructure this so that it looks more like other + ports. */ +static rtx +pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int max_arg_words = (TARGET_64BIT ? 8 : 4); + int alignment = 0; + int arg_size; + int fpr_reg_base; + int gpr_reg_base; + rtx retval; + + if (mode == VOIDmode) + return NULL_RTX; + + arg_size = FUNCTION_ARG_SIZE (mode, type); + + /* If this arg would be passed partially or totally on the stack, then + this routine should return zero. pa_arg_partial_bytes will + handle arguments which are split between regs and stack slots if + the ABI mandates split arguments. */ + if (!TARGET_64BIT) + { + /* The 32-bit ABI does not split arguments. */ + if (cum->words + arg_size > max_arg_words) + return NULL_RTX; + } + else + { + if (arg_size > 1) + alignment = cum->words & 1; + if (cum->words + alignment >= max_arg_words) + return NULL_RTX; + } + + /* The 32bit ABIs and the 64bit ABIs are rather different, + particularly in their handling of FP registers. We might + be able to cleverly share code between them, but I'm not + going to bother in the hope that splitting them up results + in code that is more easily understood. */ + + if (TARGET_64BIT) + { + /* Advance the base registers to their current locations. + + Remember, gprs grow towards smaller register numbers while + fprs grow to higher register numbers. Also remember that + although FP regs are 32-bit addressable, we pretend that + the registers are 64-bits wide. */ + gpr_reg_base = 26 - cum->words; + fpr_reg_base = 32 + cum->words; + + /* Arguments wider than one word and small aggregates need special + treatment. */ + if (arg_size > 1 + || mode == BLKmode + || (type && (AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE))) + { + /* Double-extended precision (80-bit), quad-precision (128-bit) + and aggregates including complex numbers are aligned on + 128-bit boundaries. The first eight 64-bit argument slots + are associated one-to-one, with general registers r26 + through r19, and also with floating-point registers fr4 + through fr11. Arguments larger than one word are always + passed in general registers. + + Using a PARALLEL with a word mode register results in left + justified data on a big-endian target. */ + + rtx loc[8]; + int i, offset = 0, ub = arg_size; + + /* Align the base register. */ + gpr_reg_base -= alignment; + + ub = MIN (ub, max_arg_words - cum->words - alignment); + for (i = 0; i < ub; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, gpr_reg_base), + GEN_INT (offset)); + gpr_reg_base -= 1; + offset += 8; + } + + return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); + } + } + else + { + /* If the argument is larger than a word, then we know precisely + which registers we must use. */ + if (arg_size > 1) + { + if (cum->words) + { + gpr_reg_base = 23; + fpr_reg_base = 38; + } + else + { + gpr_reg_base = 25; + fpr_reg_base = 34; + } + + /* Structures 5 to 8 bytes in size are passed in the general + registers in the same manner as other non floating-point + objects. The data is right-justified and zero-extended + to 64 bits. This is opposite to the normal justification + used on big endian targets and requires special treatment. + We now define BLOCK_REG_PADDING to pad these objects. + Aggregates, complex and vector types are passed in the same + manner as structures. */ + if (mode == BLKmode + || (type && (AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE))) + { + rtx loc = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, gpr_reg_base), + const0_rtx); + return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); + } + } + else + { + /* We have a single word (32 bits). A simple computation + will get us the register #s we need. */ + gpr_reg_base = 26 - cum->words; + fpr_reg_base = 32 + 2 * cum->words; + } + } + + /* Determine if the argument needs to be passed in both general and + floating point registers. */ + if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) + /* If we are doing soft-float with portable runtime, then there + is no need to worry about FP regs. */ + && !TARGET_SOFT_FLOAT + /* The parameter must be some kind of scalar float, else we just + pass it in integer registers. */ + && GET_MODE_CLASS (mode) == MODE_FLOAT + /* The target function must not have a prototype. */ + && cum->nargs_prototype <= 0 + /* libcalls do not need to pass items in both FP and general + registers. */ + && type != NULL_TREE + /* All this hair applies to "outgoing" args only. This includes + sibcall arguments setup with FUNCTION_INCOMING_ARG. */ + && !cum->incoming) + /* Also pass outgoing floating arguments in both registers in indirect + calls with the 32 bit ABI and the HP assembler since there is no + way to the specify argument locations in static functions. */ + || (!TARGET_64BIT + && !TARGET_GAS + && !cum->incoming + && cum->indirect + && GET_MODE_CLASS (mode) == MODE_FLOAT)) + { + retval + = gen_rtx_PARALLEL + (mode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, fpr_reg_base), + const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, gpr_reg_base), + const0_rtx))); + } + else + { + /* See if we should pass this parameter in a general register. */ + if (TARGET_SOFT_FLOAT + /* Indirect calls in the normal 32bit ABI require all arguments + to be passed in general registers. */ + || (!TARGET_PORTABLE_RUNTIME + && !TARGET_64BIT + && !TARGET_ELF32 + && cum->indirect) + /* If the parameter is not a scalar floating-point parameter, + then it belongs in GPRs. */ + || GET_MODE_CLASS (mode) != MODE_FLOAT + /* Structure with single SFmode field belongs in GPR. */ + || (type && AGGREGATE_TYPE_P (type))) + retval = gen_rtx_REG (mode, gpr_reg_base); + else + retval = gen_rtx_REG (mode, fpr_reg_base); + } + return retval; +} + +/* Arguments larger than one word are double word aligned. */ + +static unsigned int +pa_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + bool singleword = (type + ? (integer_zerop (TYPE_SIZE (type)) + || !TREE_CONSTANT (TYPE_SIZE (type)) + || int_size_in_bytes (type) <= UNITS_PER_WORD) + : GET_MODE_SIZE (mode) <= UNITS_PER_WORD); + + return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY; +} + +/* If this arg would be passed totally in registers or totally on the stack, + then this routine should return zero. */ + +static int +pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + unsigned int max_arg_words = 8; + unsigned int offset = 0; + + if (!TARGET_64BIT) + return 0; + + if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1)) + offset = 1; + + if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words) + /* Arg fits fully into registers. */ + return 0; + else if (cum->words + offset >= max_arg_words) + /* Arg fully on the stack. */ + return 0; + else + /* Arg is split. */ + return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; +} + + +/* A get_unnamed_section callback for switching to the text section. + + This function is only used with SOM. Because we don't support + named subspaces, we can only create a new subspace or switch back + to the default text subspace. */ + +static void +som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) +{ + gcc_assert (TARGET_SOM); + if (TARGET_GAS) + { + if (cfun && cfun->machine && !cfun->machine->in_nsubspa) + { + /* We only want to emit a .nsubspa directive once at the + start of the function. */ + cfun->machine->in_nsubspa = 1; + + /* Create a new subspace for the text. This provides + better stub placement and one-only functions. */ + if (cfun->decl + && DECL_ONE_ONLY (cfun->decl) + && !DECL_WEAK (cfun->decl)) + { + output_section_asm_op ("\t.SPACE $TEXT$\n" + "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," + "ACCESS=44,SORT=24,COMDAT"); + return; + } + } + else + { + /* There isn't a current function or the body of the current + function has been completed. So, we are changing to the + text section to output debugging information. Thus, we + need to forget that we are in the text section so that + varasm.c will call us when text_section is selected again. */ + gcc_assert (!cfun || !cfun->machine + || cfun->machine->in_nsubspa == 2); + in_section = NULL; + } + output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); + return; + } + output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); +} + +/* A get_unnamed_section callback for switching to comdat data + sections. This function is only used with SOM. */ + +static void +som_output_comdat_data_section_asm_op (const void *data) +{ + in_section = NULL; + output_section_asm_op (data); +} + +/* Implement TARGET_ASM_INITIALIZE_SECTIONS */ + +static void +pa_som_asm_init_sections (void) +{ + text_section + = get_unnamed_section (0, som_output_text_section_asm_op, NULL); + + /* SOM puts readonly data in the default $LIT$ subspace when PIC code + is not being generated. */ + som_readonly_data_section + = get_unnamed_section (0, output_section_asm_op, + "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); + + /* When secondary definitions are not supported, SOM makes readonly + data one-only by creating a new $LIT$ subspace in $TEXT$ with + the comdat flag. */ + som_one_only_readonly_data_section + = get_unnamed_section (0, som_output_comdat_data_section_asm_op, + "\t.SPACE $TEXT$\n" + "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," + "ACCESS=0x2c,SORT=16,COMDAT"); + + + /* When secondary definitions are not supported, SOM makes data one-only + by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ + som_one_only_data_section + = get_unnamed_section (SECTION_WRITE, + som_output_comdat_data_section_asm_op, + "\t.SPACE $PRIVATE$\n" + "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," + "ACCESS=31,SORT=24,COMDAT"); + + /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups + which reference data within the $TEXT$ space (for example constant + strings in the $LIT$ subspace). + + The assemblers (GAS and HP as) both have problems with handling + the difference of two symbols which is the other correct way to + reference constant data during PIC code generation. + + So, there's no way to reference constant data which is in the + $TEXT$ space during PIC generation. Instead place all constant + data into the $PRIVATE$ subspace (this reduces sharing, but it + works correctly). */ + readonly_data_section = flag_pic ? data_section : som_readonly_data_section; + + /* We must not have a reference to an external symbol defined in a + shared library in a readonly section, else the SOM linker will + complain. + + So, we force exception information into the data section. */ + exception_section = data_section; +} + +/* On hpux10, the linker will give an error if we have a reference + in the read-only data section to a symbol defined in a shared + library. Therefore, expressions that might require a reloc can + not be placed in the read-only data section. */ + +static section * +pa_select_section (tree exp, int reloc, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + if (TREE_CODE (exp) == VAR_DECL + && TREE_READONLY (exp) + && !TREE_THIS_VOLATILE (exp) + && DECL_INITIAL (exp) + && (DECL_INITIAL (exp) == error_mark_node + || TREE_CONSTANT (DECL_INITIAL (exp))) + && !reloc) + { + if (TARGET_SOM + && DECL_ONE_ONLY (exp) + && !DECL_WEAK (exp)) + return som_one_only_readonly_data_section; + else + return readonly_data_section; + } + else if (CONSTANT_CLASS_P (exp) && !reloc) + return readonly_data_section; + else if (TARGET_SOM + && TREE_CODE (exp) == VAR_DECL + && DECL_ONE_ONLY (exp) + && !DECL_WEAK (exp)) + return som_one_only_data_section; + else + return data_section; +} + +static void +pa_globalize_label (FILE *stream, const char *name) +{ + /* We only handle DATA objects here, functions are globalized in + ASM_DECLARE_FUNCTION_NAME. */ + if (! FUNCTION_NAME_P (name)) + { + fputs ("\t.EXPORT ", stream); + assemble_name (stream, name); + fputs (",DATA\n", stream); + } +} + +/* Worker function for TARGET_STRUCT_VALUE_RTX. */ + +static rtx +pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +bool +pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + /* SOM ABI says that objects larger than 64 bits are returned in memory. + PA64 ABI says that objects larger than 128 bits are returned in memory. + Note, int_size_in_bytes can return -1 if the size of the object is + variable or larger than the maximum value that can be expressed as + a HOST_WIDE_INT. It can also return zero for an empty type. The + simplest way to handle variable and empty types is to pass them in + memory. This avoids problems in defining the boundaries of argument + slots, allocating registers, etc. */ + return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) + || int_size_in_bytes (type) <= 0); +} + +/* Structure to hold declaration and name of external symbols that are + emitted by GCC. We generate a vector of these symbols and output them + at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. + This avoids putting out names that are never really used. */ + +typedef struct GTY(()) extern_symbol +{ + tree decl; + const char *name; +} extern_symbol; + +/* Define gc'd vector type for extern_symbol. */ +DEF_VEC_O(extern_symbol); +DEF_VEC_ALLOC_O(extern_symbol,gc); + +/* Vector of extern_symbol pointers. */ +static GTY(()) VEC(extern_symbol,gc) *extern_symbols; + +#ifdef ASM_OUTPUT_EXTERNAL_REAL +/* Mark DECL (name NAME) as an external reference (assembler output + file FILE). This saves the names to output at the end of the file + if actually referenced. */ + +void +pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) +{ + extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL); + + gcc_assert (file == asm_out_file); + p->decl = decl; + p->name = name; +} + +/* Output text required at the end of an assembler file. + This includes deferred plabels and .import directives for + all external symbols that were actually referenced. */ + +static void +pa_hpux_file_end (void) +{ + unsigned int i; + extern_symbol *p; + + if (!NO_DEFERRED_PROFILE_COUNTERS) + output_deferred_profile_counters (); + + output_deferred_plabels (); + + for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++) + { + tree decl = p->decl; + + if (!TREE_ASM_WRITTEN (decl) + && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) + ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); + } + + VEC_free (extern_symbol, gc, extern_symbols); +} +#endif + +/* Return true if a change from mode FROM to mode TO for a register + in register class RCLASS is invalid. */ + +bool +pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, + enum reg_class rclass) +{ + if (from == to) + return false; + + /* Reject changes to/from complex and vector modes. */ + if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) + || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) + return true; + + if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) + return false; + + /* There is no way to load QImode or HImode values directly from + memory. SImode loads to the FP registers are not zero extended. + On the 64-bit target, this conflicts with the definition of + LOAD_EXTEND_OP. Thus, we can't allow changing between modes + with different sizes in the floating-point registers. */ + if (MAYBE_FP_REG_CLASS_P (rclass)) + return true; + + /* HARD_REGNO_MODE_OK places modes with sizes larger than a word + in specific sets of registers. Thus, we cannot allow changing + to a larger mode when it's larger than a word. */ + if (GET_MODE_SIZE (to) > UNITS_PER_WORD + && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) + return true; + + return false; +} + +/* Returns TRUE if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be FALSE for correct output. + + We should return FALSE for QImode and HImode because these modes + are not ok in the floating-point registers. However, this prevents + tieing these modes to SImode and DImode in the general registers. + So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and + CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used + in the floating-point registers. */ + +bool +pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +{ + /* Don't tie modes in different classes. */ + if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) + return false; + + return true; +} + + +/* Length in units of the trampoline instruction code. */ + +#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) + + +/* Output assembler code for a block containing the constant parts + of a trampoline, leaving space for the variable parts.\ + + The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM + and then branches to the specified routine. + + This code template is copied from text segment to stack location + and then patched with pa_trampoline_init to contain valid values, + and then entered as a subroutine. + + It is best to keep this as small as possible to avoid having to + flush multiple lines in the cache. */ + +static void +pa_asm_trampoline_template (FILE *f) +{ + if (!TARGET_64BIT) + { + fputs ("\tldw 36(%r22),%r21\n", f); + fputs ("\tbb,>=,n %r21,30,.+16\n", f); + if (ASSEMBLER_DIALECT == 0) + fputs ("\tdepi 0,31,2,%r21\n", f); + else + fputs ("\tdepwi 0,31,2,%r21\n", f); + fputs ("\tldw 4(%r21),%r19\n", f); + fputs ("\tldw 0(%r21),%r21\n", f); + if (TARGET_PA_20) + { + fputs ("\tbve (%r21)\n", f); + fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + } + else + { + fputs ("\tldsid (%r21),%r1\n", f); + fputs ("\tmtsp %r1,%sr0\n", f); + fputs ("\tbe 0(%sr0,%r21)\n", f); + fputs ("\tldw 40(%r22),%r29\n", f); + } + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + } + else + { + fputs ("\t.dword 0\n", f); + fputs ("\t.dword 0\n", f); + fputs ("\t.dword 0\n", f); + fputs ("\t.dword 0\n", f); + fputs ("\tmfia %r31\n", f); + fputs ("\tldd 24(%r31),%r1\n", f); + fputs ("\tldd 24(%r1),%r27\n", f); + fputs ("\tldd 16(%r1),%r1\n", f); + fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 32(%r31),%r31\n", f); + fputs ("\t.dword 0 ; fptr\n", f); + fputs ("\t.dword 0 ; static link\n", f); + } +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. + + Move the function address to the trampoline template at offset 36. + Move the static chain value to trampoline template at offset 40. + Move the trampoline address to trampoline template at offset 44. + Move r19 to trampoline template at offset 48. The latter two + words create a plabel for the indirect call to the trampoline. + + A similar sequence is used for the 64-bit port but the plabel is + at the beginning of the trampoline. + + Finally, the cache entries for the trampoline code are flushed. + This is necessary to ensure that the trampoline instruction sequence + is written to memory prior to any attempts at prefetching the code + sequence. */ + +static void +pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx start_addr = gen_reg_rtx (Pmode); + rtx end_addr = gen_reg_rtx (Pmode); + rtx line_length = gen_reg_rtx (Pmode); + rtx r_tramp, tmp; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); + + if (!TARGET_64BIT) + { + tmp = adjust_address (m_tramp, Pmode, 36); + emit_move_insn (tmp, fnaddr); + tmp = adjust_address (m_tramp, Pmode, 40); + emit_move_insn (tmp, chain_value); + + /* Create a fat pointer for the trampoline. */ + tmp = adjust_address (m_tramp, Pmode, 44); + emit_move_insn (tmp, r_tramp); + tmp = adjust_address (m_tramp, Pmode, 48); + emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); + + /* fdc and fic only use registers for the address to flush, + they do not accept integer displacements. We align the + start and end addresses to the beginning of their respective + cache lines to minimize the number of lines flushed. */ + emit_insn (gen_andsi3 (start_addr, r_tramp, + GEN_INT (-MIN_CACHELINE_SIZE))); + tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1)); + emit_insn (gen_andsi3 (end_addr, tmp, + GEN_INT (-MIN_CACHELINE_SIZE))); + emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); + emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); + emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, + gen_reg_rtx (Pmode), + gen_reg_rtx (Pmode))); + } + else + { + tmp = adjust_address (m_tramp, Pmode, 56); + emit_move_insn (tmp, fnaddr); + tmp = adjust_address (m_tramp, Pmode, 64); + emit_move_insn (tmp, chain_value); + + /* Create a fat pointer for the trampoline. */ + tmp = adjust_address (m_tramp, Pmode, 16); + emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32))); + tmp = adjust_address (m_tramp, Pmode, 24); + emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); + + /* fdc and fic only use registers for the address to flush, + they do not accept integer displacements. We align the + start and end addresses to the beginning of their respective + cache lines to minimize the number of lines flushed. */ + tmp = force_reg (Pmode, plus_constant (r_tramp, 32)); + emit_insn (gen_anddi3 (start_addr, tmp, + GEN_INT (-MIN_CACHELINE_SIZE))); + tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); + emit_insn (gen_anddi3 (end_addr, tmp, + GEN_INT (-MIN_CACHELINE_SIZE))); + emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); + emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); + emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, + gen_reg_rtx (Pmode), + gen_reg_rtx (Pmode))); + } +} + +/* Perform any machine-specific adjustment in the address of the trampoline. + ADDR contains the address that was passed to pa_trampoline_init. + Adjust the trampoline address to point to the plabel at offset 44. */ + +static rtx +pa_trampoline_adjust_address (rtx addr) +{ + if (!TARGET_64BIT) + addr = memory_address (Pmode, plus_constant (addr, 46)); + return addr; +} + +static rtx +pa_delegitimize_address (rtx orig_x) +{ + rtx x = delegitimize_mem_from_attrs (orig_x); + + if (GET_CODE (x) == LO_SUM + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) + return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); + return x; +} + +static rtx +pa_internal_arg_pointer (void) +{ + /* The argument pointer and the hard frame pointer are the same in + the 32-bit runtime, so we don't need a copy. */ + if (TARGET_64BIT) + return copy_to_reg (virtual_incoming_args_rtx); + else + return virtual_incoming_args_rtx; +} + +/* Given FROM and TO register numbers, say whether this elimination is allowed. + Frame pointer elimination is automatically handled. */ + +static bool +pa_can_eliminate (const int from, const int to) +{ + /* The argument cannot be eliminated in the 64-bit runtime. */ + if (TARGET_64BIT && from == ARG_POINTER_REGNUM) + return false; + + return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM + ? ! frame_pointer_needed + : true); +} + +/* Define the offset between two registers, FROM to be eliminated and its + replacement TO, at the start of a routine. */ +HOST_WIDE_INT +pa_initial_elimination_offset (int from, int to) +{ + HOST_WIDE_INT offset; + + if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM) + && to == STACK_POINTER_REGNUM) + offset = -compute_frame_size (get_frame_size (), 0); + else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + offset = 0; + else + gcc_unreachable (); + + return offset; +} + +static void +pa_conditional_register_usage (void) +{ + int i; + + if (!TARGET_64BIT && !TARGET_PA_11) + { + for (i = 56; i <= FP_REG_LAST; i++) + fixed_regs[i] = call_used_regs[i] = 1; + for (i = 33; i < 56; i += 2) + fixed_regs[i] = call_used_regs[i] = 1; + } + if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) + { + for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) + fixed_regs[i] = call_used_regs[i] = 1; + } + if (flag_pic) + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; +} + +/* Target hook for c_mode_for_suffix. */ + +static enum machine_mode +pa_c_mode_for_suffix (char suffix) +{ + if (HPUX_LONG_DOUBLE_LIBRARY) + { + if (suffix == 'q') + return TFmode; + } + + return VOIDmode; +} + +/* Target hook for function_section. */ + +static section * +pa_function_section (tree decl, enum node_frequency freq, + bool startup, bool exit) +{ + /* Put functions in text section if target doesn't have named sections. */ + if (!targetm.have_named_sections) + return text_section; + + /* Force nested functions into the same section as the containing + function. */ + if (decl + && DECL_SECTION_NAME (decl) == NULL_TREE + && DECL_CONTEXT (decl) != NULL_TREE + && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL + && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE) + return function_section (DECL_CONTEXT (decl)); + + /* Otherwise, use the default function section. */ + return default_function_section (decl, freq, startup, exit); +} + +/* Implement TARGET_SECTION_TYPE_FLAGS. */ + +static unsigned int +pa_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags; + + flags = default_section_type_flags (decl, name, reloc); + + /* Function labels are placed in the constant pool. This can + cause a section conflict if decls are put in ".data.rel.ro" + or ".data.rel.ro.local" using the __attribute__ construct. */ + if (strcmp (name, ".data.rel.ro") == 0 + || strcmp (name, ".data.rel.ro.local") == 0) + flags |= SECTION_WRITE | SECTION_RELRO; + + return flags; +} + +#include "gt-pa.h" |