diff options
Diffstat (limited to 'gcc/config/pa')
47 files changed, 28861 insertions, 0 deletions
diff --git a/gcc/config/pa/constraints.md b/gcc/config/pa/constraints.md new file mode 100644 index 000000000..c1f3d5cd3 --- /dev/null +++ b/gcc/config/pa/constraints.md @@ -0,0 +1,140 @@ +;; Constraint definitions for pa +;; Copyright (C) 2007 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;;; Unused letters: +;;; ABCDEF H V Y +;;; bcde ghijklmnop stuvw z + +;; Register constraints. +(define_register_constraint "a" "R1_REGS" + "General register 1.") + +(define_register_constraint "f" "FP_REGS" + "Floating-point register.") + +(define_register_constraint "q" "SHIFT_REGS" + "Shift amount register.") + +;; Keep 'x' for backward compatibility with user asm. +(define_register_constraint "x" "FP_REGS" + "Floating-point register.") + +(define_register_constraint "y" "TARGET_64BIT ? FP_REGS : FPUPPER_REGS" + "Upper floating-point register.") + +(define_register_constraint "Z" "ALL_REGS" + "Any register.") + +;; Integer constant constraints. +(define_constraint "I" + "Signed 11-bit integer constant." + (and (match_code "const_int") + (match_test "VAL_11_BITS_P (ival)"))) + +(define_constraint "J" + "Signed 14-bit integer constant." + (and (match_code "const_int") + (match_test "VAL_14_BITS_P (ival)"))) + +(define_constraint "K" + "Integer constant that can be deposited with a zdepi instruction." + (and (match_code "const_int") + (match_test "zdepi_cint_p (ival)"))) + +(define_constraint "L" + "Signed 5-bit integer constant." + (and (match_code "const_int") + (match_test "VAL_5_BITS_P (ival)"))) + +(define_constraint "M" + "Integer constant 0." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "N" + "Integer constant that can be loaded with a ldil instruction." + (and (match_code "const_int") + (match_test "ldil_cint_p (ival)"))) + +(define_constraint "O" + "Integer constant such that ival+1 is a power of 2." + (and (match_code "const_int") + (match_test "(ival & (ival + 1)) == 0"))) + +(define_constraint "P" + "Integer constant that can be used as an and mask in depi and + extru instructions." + (and (match_code "const_int") + (match_test "and_mask_p (ival)"))) + +(define_constraint "S" + "Integer constant 31." + (and (match_code "const_int") + (match_test "ival == 31"))) + +(define_constraint "U" + "Integer constant 63." + (and (match_code "const_int") + (match_test "ival == 63"))) + +;; Floating-point constant constraints. +(define_constraint "G" + "Floating-point constant 0." + (and (match_code "const_double") + (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT + && op == CONST0_RTX (mode)"))) + +;; Extra constraints. +(define_constraint "A" + "A LO_SUM DLT memory operand." + (and (match_code "mem") + (match_test "IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))"))) + +(define_constraint "Q" + "A memory operand that can be used as the destination operand of an + integer store, or the source operand of an integer load. That is + any memory operand that isn't a symbolic, indexed or lo_sum memory + operand. Note that an unassigned pseudo register is such a memory + operand. We accept unassigned pseudo registers because reload + generates them and then doesn't re-recognize the insn, causing + constrain_operands to fail." + (match_test "integer_store_memory_operand (op, mode)")) + +(define_constraint "R" + "A scaled or unscaled indexed memory operand that can be used as the + source address in integer and floating-point loads." + (and (match_code "mem") + (match_test "IS_INDEX_ADDR_P (XEXP (op, 0))"))) + +(define_constraint "T" + "A memory operand for floating-point loads and stores." + (and (match_code "mem") + (match_test "!IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0)) + && !IS_INDEX_ADDR_P (XEXP (op, 0)) + && memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (op, 0))"))) + +;; We could allow short displacements but TARGET_LEGITIMATE_ADDRESS_P +;; can't tell when a long displacement is valid. +(define_constraint "W" + "A register indirect memory operand." + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0)) + && REG_OK_FOR_BASE_P (XEXP (op, 0))"))) diff --git a/gcc/config/pa/elf.h b/gcc/config/pa/elf.h new file mode 100644 index 000000000..1028206fd --- /dev/null +++ b/gcc/config/pa/elf.h @@ -0,0 +1,92 @@ +/* Definitions for ELF assembler support. + Copyright (C) 1999, 2003, 2005, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* So we can conditionalize small amounts of code in pa.c or pa.md. */ +#define OBJ_ELF + +#define ENDFILE_SPEC "crtend.o%s" + +#define STARTFILE_SPEC "%{!shared: \ + %{!symbolic: \ + %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\ + crtbegin.o%s" + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + +#define TARGET_ASM_FILE_START pa_elf_file_start + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ +do { \ + if (TREE_PUBLIC (DECL)) \ + { \ + fputs ("\t.EXPORT ", FILE); \ + assemble_name (FILE, NAME); \ + fputs (",ENTRY\n", FILE); \ + } \ + } while (0) + +/* This is how to output a command to make the user-level label + named NAME defined for reference from other files. We use + assemble_name_raw instead of assemble_name since a symbol in + a .IMPORT directive that isn't otherwise referenced is not + placed in the symbol table of the assembled object. + + Failure to import a function reference can cause the HP linker + to segmentation fault! + + Note that the SOM based tools need the symbol imported as a + CODE symbol, while the ELF based tools require the symbol to + be imported as an ENTRY symbol. */ + +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + pa_hpux_asm_output_external ((FILE), (DECL), (NAME)) +#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \ + do { fputs ("\t.IMPORT ", FILE); \ + assemble_name_raw (FILE, NAME); \ + if (FUNCTION_NAME_P (NAME)) \ + fputs (",ENTRY\n", FILE); \ + else \ + fputs (",DATA\n", FILE); \ + } while (0) + +/* The bogus HP assembler requires ALL external references to be + "imported", even library calls. They look a bit different, so + here's this macro. + + Also note not all libcall names are passed to + targetm.encode_section_info (__main for example). To make sure all + libcall names have section info recorded in them, we do it here. */ + +#undef ASM_OUTPUT_EXTERNAL_LIBCALL +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, RTL) \ + do { fputs ("\t.IMPORT ", FILE); \ + if (!function_label_operand (RTL, VOIDmode)) \ + hppa_encode_label (RTL); \ + assemble_name (FILE, XSTR ((RTL), 0)); \ + fputs (",ENTRY\n", FILE); \ + } while (0) + +/* Biggest alignment supported by the object file format of this + machine. Use this macro to limit the alignment which can be + specified using the `__attribute__ ((aligned (N)))' construct. If + not defined, the default value is `BIGGEST_ALIGNMENT'. */ +#define MAX_OFILE_ALIGNMENT (32768 * 8) diff --git a/gcc/config/pa/fptr.c b/gcc/config/pa/fptr.c new file mode 100644 index 000000000..320d18267 --- /dev/null +++ b/gcc/config/pa/fptr.c @@ -0,0 +1,131 @@ +/* Subroutine for function pointer canonicalization on PA-RISC with ELF32. + Copyright 2002, 2003, 2004, 2007, 2009 Free Software Foundation, Inc. + Contributed by John David Anglin (dave.anglin@nrc.ca). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +/* WARNING: The code is this function depends on internal and undocumented + details of the GNU linker and dynamic loader as implemented for parisc + linux. */ + +/* This MUST match the defines sysdeps/hppa/dl-machine.h and + bfd/elf32-hppa.c. */ +#define GOT_FROM_PLT_STUB (4*4) + +/* List of byte offsets in _dl_runtime_resolve to search for "bl" branches. + The first "bl" branch instruction found MUST be a call to fixup. See + the define for TRAMPOLINE_TEMPLATE in sysdeps/hppa/dl-machine.h. If + the trampoline template is changed, the list must be appropriately + updated. The offset of -4 allows for a magic branch at the start of + the template should it be necessary to change the current branch + position. */ +#define NOFFSETS 2 +static int fixup_branch_offset[NOFFSETS] = { 32, -4 }; + +#define GET_FIELD(X, FROM, TO) \ + ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1)) +#define SIGN_EXTEND(VAL,BITS) \ + ((int) ((VAL) >> ((BITS) - 1) ? (-1 << (BITS)) | (VAL) : (VAL))) + +struct link_map; +typedef int (*fptr_t) (void); +typedef int (*fixup_t) (struct link_map *, unsigned int); +extern unsigned int _GLOBAL_OFFSET_TABLE_; + +/* __canonicalize_funcptr_for_compare must be hidden so that it is not + placed in the dynamic symbol table. Like millicode functions, it + must be linked into all binaries in order access the got table of + that binary. However, we don't use the millicode calling convention + and the routine must be a normal function so that it can be compiled + as pic code. */ +unsigned int __canonicalize_funcptr_for_compare (fptr_t) + __attribute__ ((visibility ("hidden"))); + +unsigned int +__canonicalize_funcptr_for_compare (fptr_t fptr) +{ + static unsigned int fixup_plabel[2]; + static fixup_t fixup; + unsigned int *plabel, *got; + + /* -1 and page 0 are special. -1 is used in crtend to mark the end of + a list of function pointers. Also return immediately if the plabel + bit is not set in the function pointer. In this case, the function + pointer points directly to the function. */ + if ((int) fptr == -1 || (unsigned int) fptr < 4096 || !((int) fptr & 2)) + return (unsigned int) fptr; + + /* The function pointer points to a function descriptor (plabel). If + the plabel hasn't been resolved, the first word of the plabel points + to the entry of the PLT stub just before the global offset table. + The second word in the plabel contains the relocation offset for the + function. */ + plabel = (unsigned int *) ((unsigned int) fptr & ~3); + got = (unsigned int *) (plabel[0] + GOT_FROM_PLT_STUB); + + /* Return the address of the function if the plabel has been resolved. */ + if (got != &_GLOBAL_OFFSET_TABLE_) + return plabel[0]; + + /* Initialize our plabel for calling fixup if we haven't done so already. + This code needs to be thread safe but we don't have to be too careful + as the result is invariant. */ + if (!fixup) + { + int i; + unsigned int *iptr; + + /* Find the first "bl" branch in the offset search list. This is a + call to fixup or a magic branch to fixup at the beginning of the + trampoline template. The fixup function does the actual runtime + resolution of function descriptors. We only look for "bl" branches + with a 17-bit pc-relative displacement. */ + for (i = 0; i < NOFFSETS; i++) + { + iptr = (unsigned int *) (got[-2] + fixup_branch_offset[i]); + if ((*iptr & 0xfc00e000) == 0xe8000000) + break; + } + + /* This should not happen... */ + if (i == NOFFSETS) + return ~0; + + /* Extract the 17-bit displacement from the instruction. */ + iptr += SIGN_EXTEND (GET_FIELD (*iptr, 19, 28) | + GET_FIELD (*iptr, 29, 29) << 10 | + GET_FIELD (*iptr, 11, 15) << 11 | + GET_FIELD (*iptr, 31, 31) << 16, 17); + + /* Build a plabel for an indirect call to fixup. */ + fixup_plabel[0] = (unsigned int) iptr + 8; /* address of fixup */ + fixup_plabel[1] = got[-1]; /* ltp for fixup */ + fixup = (fixup_t) ((int) fixup_plabel | 3); + } + + /* Call fixup to resolve the function address. got[1] contains the + link_map pointer and plabel[1] the relocation offset. */ + fixup ((struct link_map *) got[1], plabel[1]); + + return plabel[0]; +} diff --git a/gcc/config/pa/hpux-unwind.h b/gcc/config/pa/hpux-unwind.h new file mode 100644 index 000000000..92061ec36 --- /dev/null +++ b/gcc/config/pa/hpux-unwind.h @@ -0,0 +1,361 @@ +/* DWARF2 EH unwinding support for PA HP-UX. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +/* Don't use this if inhibit_libc is set. + The build for this target will fail trying to include missing headers. */ +#ifndef inhibit_libc +#include <signal.h> +#include <sys/ucontext.h> +#include <unistd.h> + +/* FIXME: We currently ignore the high halves of general, space and + control registers on PA 2.0 machines for applications using the + 32-bit runtime. We don't restore space registers or the floating + point status registers. */ + +#define MD_FALLBACK_FRAME_STATE_FOR pa_fallback_frame_state + +/* HP-UX 10.X doesn't define GetSSReg. */ +#ifndef GetSSReg +#define GetSSReg(ssp, ss_reg) \ + ((UseWideRegs (ssp)) \ + ? (ssp)->ss_wide.ss_32.ss_reg ## _lo \ + : (ssp)->ss_narrow.ss_reg) +#endif + +#if TARGET_64BIT +#define GetSSRegAddr(ssp, ss_reg) ((long) &((ssp)->ss_wide.ss_64.ss_reg)) +#else +#define GetSSRegAddr(ssp, ss_reg) \ + ((UseWideRegs (ssp)) \ + ? (long) &((ssp)->ss_wide.ss_32.ss_reg ## _lo) \ + : (long) &((ssp)->ss_narrow.ss_reg)) +#endif + +#define UPDATE_FS_FOR_SAR(FS, N) \ + (FS)->regs.reg[N].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[N].loc.offset = GetSSRegAddr (mc, ss_cr11) - new_cfa + +#define UPDATE_FS_FOR_GR(FS, GRN, N) \ + (FS)->regs.reg[N].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[N].loc.offset = GetSSRegAddr (mc, ss_gr##GRN) - new_cfa + +#define UPDATE_FS_FOR_FR(FS, FRN, N) \ + (FS)->regs.reg[N].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[N].loc.offset = (long) &(mc->ss_fr##FRN) - new_cfa; + +#define UPDATE_FS_FOR_PC(FS, N) \ + (FS)->regs.reg[N].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[N].loc.offset = GetSSRegAddr (mc, ss_pcoq_head) - new_cfa + +/* Extract bit field from word using HP's numbering (MSB = 0). */ +#define GET_FIELD(X, FROM, TO) \ + ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1)) + +static inline int +sign_extend (int x, int len) +{ + int signbit = (1 << (len - 1)); + int mask = (signbit << 1) - 1; + return ((x & mask) ^ signbit) - signbit; +} + +/* Extract a 17-bit signed constant from branch instructions. */ +static inline int +extract_17 (unsigned word) +{ + return sign_extend (GET_FIELD (word, 19, 28) + | GET_FIELD (word, 29, 29) << 10 + | GET_FIELD (word, 11, 15) << 11 + | (word & 0x1) << 16, 17); +} + +/* Extract a 22-bit signed constant from branch instructions. */ +static inline int +extract_22 (unsigned word) +{ + return sign_extend (GET_FIELD (word, 19, 28) + | GET_FIELD (word, 29, 29) << 10 + | GET_FIELD (word, 11, 15) << 11 + | GET_FIELD (word, 6, 10) << 16 + | (word & 0x1) << 21, 22); +} + +static _Unwind_Reason_Code +pa_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + static long cpu; + unsigned int *pc = (unsigned int *) context->ra; + + if (pc == 0) + return _URC_END_OF_STACK; + + /* Check for relocation of the return value. */ + if (!TARGET_64BIT + && *(pc + 0) == 0x2fd01224 /* fstd,ma fr4,8(sp) */ + && *(pc + 1) == 0x0fd9109d /* ldw -4(sp),ret1 */ + && *(pc + 2) == 0x0fd130bc) /* ldw,mb -8(sp),ret0 */ + pc += 3; + else if (!TARGET_64BIT + && *(pc + 0) == 0x27d01224 /* fstw,ma fr4,8(sp) */ + && *(pc + 1) == 0x0fd130bc) /* ldw,mb -8(sp),ret0 */ + pc += 2; + else if (!TARGET_64BIT + && *(pc + 0) == 0x0fdc12b0 /* stw,ma ret0,8(sp) */ + && *(pc + 1) == 0x0fdd1299 /* stw ret1,-4(sp) */ + && *(pc + 2) == 0x2fd13024) /* fldd,mb -8(sp),fr4 */ + pc += 3; + else if (!TARGET_64BIT + && *(pc + 0) == 0x0fdc12b0 /* stw,ma ret0,8(sp) */ + && *(pc + 1) == 0x27d13024) /* fldw,mb -8(sp),fr4 */ + pc += 2; + + /* Check if the return address points to an export stub (PA 1.1 or 2.0). */ + if ((!TARGET_64BIT + && *(pc + 0) == 0x4bc23fd1 /* ldw -18(sp),rp */ + && *(pc + 1) == 0x004010a1 /* ldsid (rp),r1 */ + && *(pc + 2) == 0x00011820 /* mtsp r1,sr0 */ + && *(pc + 3) == 0xe0400002) /* be,n 0(sr0,rp) */ + || + (!TARGET_64BIT + && *(pc + 0) == 0x4bc23fd1 /* ldw -18(sp),rp */ + && *(pc + 1) == 0xe840d002)) /* bve,n (rp) */ + { + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = 0; + + fs->retaddr_column = 0; + fs->regs.reg[0].how = REG_SAVED_OFFSET; + fs->regs.reg[0].loc.offset = -24; + + /* Update context to describe the stub frame. */ + uw_update_context (context, fs); + + /* Set up fs to describe the FDE for the caller of this stub. */ + return uw_frame_state_for (context, fs); + } + /* Check if the return address points to a relocation stub. */ + else if (!TARGET_64BIT + && *(pc + 0) == 0x0fd11082 /* ldw -8(sp),rp */ + && (*(pc + 1) == 0xe840c002 /* bv,n r0(rp) */ + || *(pc + 1) == 0xe840d002)) /* bve,n (rp) */ + { + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = 0; + + fs->retaddr_column = 0; + fs->regs.reg[0].how = REG_SAVED_OFFSET; + fs->regs.reg[0].loc.offset = -8; + + /* Update context to describe the stub frame. */ + uw_update_context (context, fs); + + /* Set up fs to describe the FDE for the caller of this stub. */ + return uw_frame_state_for (context, fs); + } + + /* Check if the return address is an export stub as signal handlers + may return via an export stub. */ + if (!TARGET_64BIT + && (*pc & 0xffe0e002) == 0xe8400000 /* bl x,r2 */ + && *(pc + 1) == 0x08000240 /* nop */ + && *(pc + 2) == 0x4bc23fd1 /* ldw -18(sp),rp */ + && *(pc + 3) == 0x004010a1 /* ldsid (rp),r1 */ + && *(pc + 4) == 0x00011820 /* mtsp r1,sr0 */ + && *(pc + 5) == 0xe0400002) /* be,n 0(sr0,rp) */ + /* Extract target address from PA 1.x 17-bit branch. */ + pc += extract_17 (*pc) + 2; + else if (!TARGET_64BIT + && (*pc & 0xfc00e002) == 0xe800a000 /* b,l x,r2 */ + && *(pc + 1) == 0x08000240 /* nop */ + && *(pc + 2) == 0x4bc23fd1 /* ldw -18(sp),rp */ + && *(pc + 3) == 0xe840d002) /* bve,n (rp) */ + /* Extract target address from PA 2.0 22-bit branch. */ + pc += extract_22 (*pc) + 2; + + /* Now check if the return address is one of the signal handler + returns, _sigreturn or _sigsetreturn. */ + if ((TARGET_64BIT + && *(pc + 0) == 0x53db3f51 /* ldd -58(sp),dp */ + && *(pc + 8) == 0x34160116 /* ldi 8b,r22 */ + && *(pc + 9) == 0x08360ac1 /* shladd,l r22,3,r1,r1 */ + && *(pc + 10) == 0x0c2010c1 /* ldd 0(r1),r1 */ + && *(pc + 11) == 0xe4202000) /* be,l 0(sr4,r1) */ + || + (TARGET_64BIT + && *(pc + 0) == 0x36dc0000 /* ldo 0(r22),ret0 */ + && *(pc + 6) == 0x341601c0 /* ldi e0,r22 */ + && *(pc + 7) == 0x08360ac1 /* shladd,l r22,3,r1,r1 */ + && *(pc + 8) == 0x0c2010c1 /* ldd 0(r1),r1 */ + && *(pc + 9) == 0xe4202000) /* be,l 0(sr4,r1) */ + || + (!TARGET_64BIT + && *(pc + 0) == 0x379a0000 /* ldo 0(ret0),r26 */ + && *(pc + 1) == 0x6bd33fc9 /* stw r19,-1c(sp) */ + && *(pc + 2) == 0x20200801 /* ldil L%-40000000,r1 */ + && *(pc + 3) == 0xe420e008 /* be,l 4(sr7,r1) */ + && *(pc + 4) == 0x34160116) /* ldi 8b,r22 */ + || + (!TARGET_64BIT + && *(pc + 0) == 0x6bd33fc9 /* stw r19,-1c(sp) */ + && *(pc + 1) == 0x20200801 /* ldil L%-40000000,r1 */ + && *(pc + 2) == 0xe420e008 /* be,l 4(sr7,r1) */ + && *(pc + 3) == 0x341601c0)) /* ldi e0,r22 */ + { + /* The previous stack pointer is saved at (long *)SP - 1. The + ucontext structure is offset from the start of the previous + frame by the siglocal_misc structure. */ + struct siglocalx *sl = (struct siglocalx *) + (*((long *) context->cfa - 1)); + mcontext_t *mc = &(sl->sl_uc.uc_mcontext); + + long new_cfa = GetSSReg (mc, ss_sp); + + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + UPDATE_FS_FOR_GR (fs, 1, 1); + UPDATE_FS_FOR_GR (fs, 2, 2); + UPDATE_FS_FOR_GR (fs, 3, 3); + UPDATE_FS_FOR_GR (fs, 4, 4); + UPDATE_FS_FOR_GR (fs, 5, 5); + UPDATE_FS_FOR_GR (fs, 6, 6); + UPDATE_FS_FOR_GR (fs, 7, 7); + UPDATE_FS_FOR_GR (fs, 8, 8); + UPDATE_FS_FOR_GR (fs, 9, 9); + UPDATE_FS_FOR_GR (fs, 10, 10); + UPDATE_FS_FOR_GR (fs, 11, 11); + UPDATE_FS_FOR_GR (fs, 12, 12); + UPDATE_FS_FOR_GR (fs, 13, 13); + UPDATE_FS_FOR_GR (fs, 14, 14); + UPDATE_FS_FOR_GR (fs, 15, 15); + UPDATE_FS_FOR_GR (fs, 16, 16); + UPDATE_FS_FOR_GR (fs, 17, 17); + UPDATE_FS_FOR_GR (fs, 18, 18); + UPDATE_FS_FOR_GR (fs, 19, 19); + UPDATE_FS_FOR_GR (fs, 20, 20); + UPDATE_FS_FOR_GR (fs, 21, 21); + UPDATE_FS_FOR_GR (fs, 22, 22); + UPDATE_FS_FOR_GR (fs, 23, 23); + UPDATE_FS_FOR_GR (fs, 24, 24); + UPDATE_FS_FOR_GR (fs, 25, 25); + UPDATE_FS_FOR_GR (fs, 26, 26); + UPDATE_FS_FOR_GR (fs, 27, 27); + UPDATE_FS_FOR_GR (fs, 28, 28); + UPDATE_FS_FOR_GR (fs, 29, 29); + UPDATE_FS_FOR_GR (fs, 30, 30); + UPDATE_FS_FOR_GR (fs, 31, 31); + + if (TARGET_64BIT) + { + UPDATE_FS_FOR_FR (fs, 4, 32); + UPDATE_FS_FOR_FR (fs, 5, 33); + UPDATE_FS_FOR_FR (fs, 6, 34); + UPDATE_FS_FOR_FR (fs, 7, 35); + UPDATE_FS_FOR_FR (fs, 8, 36); + UPDATE_FS_FOR_FR (fs, 9, 37); + UPDATE_FS_FOR_FR (fs, 10, 38); + UPDATE_FS_FOR_FR (fs, 11, 39); + UPDATE_FS_FOR_FR (fs, 12, 40); + UPDATE_FS_FOR_FR (fs, 13, 41); + UPDATE_FS_FOR_FR (fs, 14, 42); + UPDATE_FS_FOR_FR (fs, 15, 43); + UPDATE_FS_FOR_FR (fs, 16, 44); + UPDATE_FS_FOR_FR (fs, 17, 45); + UPDATE_FS_FOR_FR (fs, 18, 46); + UPDATE_FS_FOR_FR (fs, 19, 47); + UPDATE_FS_FOR_FR (fs, 20, 48); + UPDATE_FS_FOR_FR (fs, 21, 49); + UPDATE_FS_FOR_FR (fs, 22, 50); + UPDATE_FS_FOR_FR (fs, 23, 51); + UPDATE_FS_FOR_FR (fs, 24, 52); + UPDATE_FS_FOR_FR (fs, 25, 53); + UPDATE_FS_FOR_FR (fs, 26, 54); + UPDATE_FS_FOR_FR (fs, 27, 55); + UPDATE_FS_FOR_FR (fs, 28, 56); + UPDATE_FS_FOR_FR (fs, 29, 57); + UPDATE_FS_FOR_FR (fs, 30, 58); + UPDATE_FS_FOR_FR (fs, 31, 59); + + UPDATE_FS_FOR_SAR (fs, 60); + } + else + { + UPDATE_FS_FOR_FR (fs, 4, 32); + UPDATE_FS_FOR_FR (fs, 5, 34); + UPDATE_FS_FOR_FR (fs, 6, 36); + UPDATE_FS_FOR_FR (fs, 7, 38); + UPDATE_FS_FOR_FR (fs, 8, 40); + UPDATE_FS_FOR_FR (fs, 9, 44); + UPDATE_FS_FOR_FR (fs, 10, 44); + UPDATE_FS_FOR_FR (fs, 11, 46); + UPDATE_FS_FOR_FR (fs, 12, 48); + UPDATE_FS_FOR_FR (fs, 13, 50); + UPDATE_FS_FOR_FR (fs, 14, 52); + UPDATE_FS_FOR_FR (fs, 15, 54); + + if (!cpu) + cpu = sysconf (_SC_CPU_VERSION); + + /* PA-RISC 1.0 only has 16 floating point registers. */ + if (cpu != CPU_PA_RISC1_0) + { + UPDATE_FS_FOR_FR (fs, 16, 56); + UPDATE_FS_FOR_FR (fs, 17, 58); + UPDATE_FS_FOR_FR (fs, 18, 60); + UPDATE_FS_FOR_FR (fs, 19, 62); + UPDATE_FS_FOR_FR (fs, 20, 64); + UPDATE_FS_FOR_FR (fs, 21, 66); + UPDATE_FS_FOR_FR (fs, 22, 68); + UPDATE_FS_FOR_FR (fs, 23, 70); + UPDATE_FS_FOR_FR (fs, 24, 72); + UPDATE_FS_FOR_FR (fs, 25, 74); + UPDATE_FS_FOR_FR (fs, 26, 76); + UPDATE_FS_FOR_FR (fs, 27, 78); + UPDATE_FS_FOR_FR (fs, 28, 80); + UPDATE_FS_FOR_FR (fs, 29, 82); + UPDATE_FS_FOR_FR (fs, 30, 84); + UPDATE_FS_FOR_FR (fs, 31, 86); + } + + UPDATE_FS_FOR_SAR (fs, 88); + } + + fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN; + UPDATE_FS_FOR_PC (fs, DWARF_ALT_FRAME_RETURN_COLUMN); + fs->signal_frame = 1; + + return _URC_NO_REASON; + } + + return _URC_END_OF_STACK; +} +#endif /* inhibit_libc */ diff --git a/gcc/config/pa/lib2funcs.asm b/gcc/config/pa/lib2funcs.asm new file mode 100644 index 000000000..8aa398c87 --- /dev/null +++ b/gcc/config/pa/lib2funcs.asm @@ -0,0 +1,74 @@ +; Subroutines for calling unbound dynamic functions from within GDB for HPPA. +; Subroutines for out of line prologues and epilogues on for the HPPA +; Copyright (C) 1994, 1995, 1996, 2009 Free Software Foundation, Inc. + +; This file is part of GCC. + +; GCC is free software; you can redistribute it and/or modify +; it under the terms of the GNU General Public License as published by +; the Free Software Foundation; either version 3, or (at your option) +; any later version. + +; GCC is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. + +; Under Section 7 of GPL version 3, you are granted additional +; permissions described in the GCC Runtime Library Exception, version +; 3.1, as published by the Free Software Foundation. + +; You should have received a copy of the GNU General Public License and +; a copy of the GCC Runtime Library Exception along with this program; +; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +; <http://www.gnu.org/licenses/>. + +#if !defined(__pro__) && !defined(__rtems__) + .SPACE $PRIVATE$ + .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 + .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 + .SPACE $TEXT$ + .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 + .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY + .SUBSPA $MILLICODE$,QUAD=0,ALIGN=8,ACCESS=44,SORT=8 +#endif + .IMPORT $$dyncall,MILLICODE +#if !defined(__pro__) && !defined(__rtems__) + .SPACE $TEXT$ + .SUBSPA $CODE$ +#else + .text +#endif + +; Simply call with the address of the desired import stub in %r22 and +; arguments in the normal place (%r26-%r23 and stack slots). +; + .align 4 + .EXPORT __gcc_plt_call,ENTRY,PRIV_LEV=3,RTNVAL=GR +__gcc_plt_call + .PROC + .CALLINFO + .ENTRY + ; Our return address comes in %r31, not %r2! + stw %r31,-8(%r30) + + ; An inline version of dyncall so we don't have to worry + ; about long calls to millicode, PIC and other complexities. + bb,>=,n %r22,30,L$foo + depi 0,31,2,%r22 + ldw 4(%r22),%r19 + ldw 0(%r22),%r22 +L$foo + ldsid (%r22),%r1 + mtsp %r1,%sr0 + ble 0(%sr0,%r22) + copy %r31,%r2 + ldw -8(%r30),%r2 + + ; We're going to be returning to a stack address, so we + ; need to do an intra-space return. + ldsid (%rp),%r1 + mtsp %r1,%sr0 + be,n 0(%sr0,%rp) + .EXIT + .PROCEND diff --git a/gcc/config/pa/linux-atomic.c b/gcc/config/pa/linux-atomic.c new file mode 100644 index 000000000..2ae242635 --- /dev/null +++ b/gcc/config/pa/linux-atomic.c @@ -0,0 +1,305 @@ +/* Linux-specific atomic operations for PA Linux. + Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. + Based on code contributed by CodeSourcery for ARM EABI Linux. + Modifications for PA Linux by Helge Deller <deller@gmx.de> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#define EFAULT 14 +#define EBUSY 16 +#define ENOSYS 251 + +/* All PA-RISC implementations supported by linux have strongly + ordered loads and stores. Only cache flushes and purges can be + delayed. The data cache implementations are all globally + coherent. Thus, there is no need to synchonize memory accesses. + + GCC automatically issues a asm memory barrier when it encounters + a __sync_synchronize builtin. Thus, we do not need to define this + builtin. + + We implement byte, short and int versions of each atomic operation + using the kernel helper defined below. There is no support for + 64-bit operations yet. */ + +/* A privileged instruction to crash a userspace program with SIGILL. */ +#define ABORT_INSTRUCTION asm ("iitlbp %r0,(%sr0, %r0)") + +/* Determine kernel LWS function call (0=32-bit, 1=64-bit userspace). */ +#define LWS_CAS (sizeof(unsigned long) == 4 ? 0 : 1) + +/* Kernel helper for compare-and-exchange a 32-bit value. */ +static inline long +__kernel_cmpxchg (int oldval, int newval, int *mem) +{ + register unsigned long lws_mem asm("r26") = (unsigned long) (mem); + register long lws_ret asm("r28"); + register long lws_errno asm("r21"); + register int lws_old asm("r25") = oldval; + register int lws_new asm("r24") = newval; + asm volatile ( "ble 0xb0(%%sr2, %%r0) \n\t" + "ldi %5, %%r20 \n\t" + : "=r" (lws_ret), "=r" (lws_errno), "=r" (lws_mem), + "=r" (lws_old), "=r" (lws_new) + : "i" (LWS_CAS), "2" (lws_mem), "3" (lws_old), "4" (lws_new) + : "r1", "r20", "r22", "r23", "r29", "r31", "memory" + ); + if (__builtin_expect (lws_errno == -EFAULT || lws_errno == -ENOSYS, 0)) + ABORT_INSTRUCTION; + + /* If the kernel LWS call succeeded (lws_errno == 0), lws_ret contains + the old value from memory. If this value is equal to OLDVAL, the + new value was written to memory. If not, return -EBUSY. */ + if (!lws_errno && lws_ret != oldval) + lws_errno = -EBUSY; + + return lws_errno; +} + +#define HIDDEN __attribute__ ((visibility ("hidden"))) + +/* Big endian masks */ +#define INVERT_MASK_1 24 +#define INVERT_MASK_2 16 + +#define MASK_1 0xffu +#define MASK_2 0xffffu + +#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_fetch_and_##OP##_4 (int *ptr, int val) \ + { \ + int failure, tmp; \ + \ + do { \ + tmp = *ptr; \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return tmp; \ + } + +FETCH_AND_OP_WORD (add, , +) +FETCH_AND_OP_WORD (sub, , -) +FETCH_AND_OP_WORD (or, , |) +FETCH_AND_OP_WORD (and, , &) +FETCH_AND_OP_WORD (xor, , ^) +FETCH_AND_OP_WORD (nand, ~, &) + +#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH +#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH + +/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for + subword-sized quantities. */ + +#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \ + TYPE HIDDEN \ + NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \ + { \ + int *wordptr = (int *) ((unsigned long) ptr & ~3); \ + unsigned int mask, shift, oldval, newval; \ + int failure; \ + \ + shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = *wordptr; \ + newval = ((PFX_OP (((oldval & mask) >> shift) \ + INF_OP (unsigned int) val)) << shift) & mask; \ + newval |= oldval & ~mask; \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (RETURN & mask) >> shift; \ + } + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval) + +#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_##OP##_and_fetch_4 (int *ptr, int val) \ + { \ + int tmp, failure; \ + \ + do { \ + tmp = *ptr; \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return PFX_OP (tmp INF_OP val); \ + } + +OP_AND_FETCH_WORD (add, , +) +OP_AND_FETCH_WORD (sub, , -) +OP_AND_FETCH_WORD (or, , |) +OP_AND_FETCH_WORD (and, , &) +OP_AND_FETCH_WORD (xor, , ^) +OP_AND_FETCH_WORD (nand, ~, &) + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval) + +int HIDDEN +__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int actual_oldval, fail; + + while (1) + { + actual_oldval = *ptr; + + if (__builtin_expect (oldval != actual_oldval, 0)) + return actual_oldval; + + fail = __kernel_cmpxchg (actual_oldval, newval, ptr); + + if (__builtin_expect (!fail, 1)) + return actual_oldval; + } +} + +#define SUBWORD_VAL_CAS(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + int *wordptr = (int *)((unsigned long) ptr & ~3), fail; \ + unsigned int mask, shift, actual_oldval, actual_newval; \ + \ + shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + while (1) \ + { \ + actual_oldval = *wordptr; \ + \ + if (__builtin_expect (((actual_oldval & mask) >> shift) \ + != (unsigned int) oldval, 0)) \ + return (actual_oldval & mask) >> shift; \ + \ + actual_newval = (actual_oldval & ~mask) \ + | (((unsigned int) newval << shift) & mask); \ + \ + fail = __kernel_cmpxchg (actual_oldval, actual_newval, \ + wordptr); \ + \ + if (__builtin_expect (!fail, 1)) \ + return (actual_oldval & mask) >> shift; \ + } \ + } + +SUBWORD_VAL_CAS (unsigned short, 2) +SUBWORD_VAL_CAS (unsigned char, 1) + +typedef unsigned char bool; + +bool HIDDEN +__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int failure = __kernel_cmpxchg (oldval, newval, ptr); + return (failure == 0); +} + +#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \ + bool HIDDEN \ + __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + TYPE actual_oldval \ + = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \ + return (oldval == actual_oldval); \ + } + +SUBWORD_BOOL_CAS (unsigned short, 2) +SUBWORD_BOOL_CAS (unsigned char, 1) + +int HIDDEN +__sync_lock_test_and_set_4 (int *ptr, int val) +{ + int failure, oldval; + + do { + oldval = *ptr; + failure = __kernel_cmpxchg (oldval, val, ptr); + } while (failure != 0); + + return oldval; +} + +#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \ + { \ + int failure; \ + unsigned int oldval, newval, shift, mask; \ + int *wordptr = (int *) ((unsigned long) ptr & ~3); \ + \ + shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = *wordptr; \ + newval = (oldval & ~mask) \ + | (((unsigned int) val << shift) & mask); \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (oldval & mask) >> shift; \ + } + +SUBWORD_TEST_AND_SET (unsigned short, 2) +SUBWORD_TEST_AND_SET (unsigned char, 1) + +#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ + void HIDDEN \ + __sync_lock_release_##WIDTH (TYPE *ptr) \ + { \ + *ptr = 0; \ + } + +SYNC_LOCK_RELEASE (int, 4) +SYNC_LOCK_RELEASE (short, 2) +SYNC_LOCK_RELEASE (char, 1) diff --git a/gcc/config/pa/linux-unwind.h b/gcc/config/pa/linux-unwind.h new file mode 100644 index 000000000..38b4eda7a --- /dev/null +++ b/gcc/config/pa/linux-unwind.h @@ -0,0 +1,141 @@ +/* DWARF2 EH unwinding support for PA Linux. + Copyright (C) 2004, 2005, 2009, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +/* Don't use this if inhibit_libc is set. + The build for this target will fail trying to include missing headers. */ +#ifndef inhibit_libc +#include <signal.h> +#include <sys/ucontext.h> + +/* Unfortunately, because of various bugs and changes to the kernel, + we have several cases to deal with. + + In 2.4, the signal trampoline is 4 words, and (CONTEXT)->ra should + point directly at the beginning of the trampoline and struct rt_sigframe. + + In <= 2.6.5-rc2-pa3, the signal trampoline is 9 words, and + (CONTEXT)->ra points at the 4th word in the trampoline structure. This + is wrong, it should point at the 5th word. This is fixed in 2.6.5-rc2-pa4. + + To detect these cases, we first take (CONTEXT)->ra, align it to 64-bytes + to get the beginning of the signal frame, and then check offsets 0, 4 + and 5 to see if we found the beginning of the trampoline. This will + tell us how to locate the sigcontext structure. + + Note that with a 2.4 64-bit kernel, the signal context is not properly + passed back to userspace so the unwind will not work correctly. */ + +#define MD_FALLBACK_FRAME_STATE_FOR pa32_fallback_frame_state + +static _Unwind_Reason_Code +pa32_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned long sp = (unsigned long)context->ra & ~63; + unsigned int *pc = (unsigned int *)sp; + unsigned long off; + _Unwind_Ptr new_cfa; + int i; + struct sigcontext *sc; + struct rt_sigframe { + siginfo_t info; + struct ucontext uc; + } *frame; + + /* rt_sigreturn trampoline: + 3419000x ldi 0, %r25 or ldi 1, %r25 (x = 0 or 2) + 3414015a ldi __NR_rt_sigreturn, %r20 + e4008200 be,l 0x100(%sr2, %r0), %sr0, %r31 + 08000240 nop */ + + if (pc[0] == 0x34190000 || pc[0] == 0x34190002) + off = 4*4; + else if (pc[4] == 0x34190000 || pc[4] == 0x34190002) + { + pc += 4; + off = 10 * 4; + } + else if (pc[5] == 0x34190000 || pc[5] == 0x34190002) + { + pc += 5; + off = 10 * 4; + } + else + { + /* We may have to unwind through an alternate signal stack. + We assume that the alignment of the alternate signal stack + is BIGGEST_ALIGNMENT (i.e., that it has been allocated using + malloc). As a result, we can't distinguish trampolines + used prior to 2.6.5-rc2-pa4. However after 2.6.5-rc2-pa4, + the return address of a signal trampoline will be on an odd + word boundary and we can then determine the frame offset. */ + sp = (unsigned long)context->ra; + pc = (unsigned int *)sp; + if ((pc[0] == 0x34190000 || pc[0] == 0x34190002) && (sp & 4)) + off = 5 * 4; + else + return _URC_END_OF_STACK; + } + + if (pc[1] != 0x3414015a + || pc[2] != 0xe4008200 + || pc[3] != 0x08000240) + return _URC_END_OF_STACK; + + frame = (struct rt_sigframe *)(sp + off); + sc = &frame->uc.uc_mcontext; + + new_cfa = sc->sc_gr[30]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + for (i = 1; i <= 31; i++) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset = (long)&sc->sc_gr[i] - new_cfa; + } + for (i = 4; i <= 31; i++) + { + /* FP regs have left and right halves */ + fs->regs.reg[2*i+24].how = REG_SAVED_OFFSET; + fs->regs.reg[2*i+24].loc.offset + = (long)&sc->sc_fr[i] - new_cfa; + fs->regs.reg[2*i+24+1].how = REG_SAVED_OFFSET; + fs->regs.reg[2*i+24+1].loc.offset + = (long)&sc->sc_fr[i] + 4 - new_cfa; + } + fs->regs.reg[88].how = REG_SAVED_OFFSET; + fs->regs.reg[88].loc.offset = (long) &sc->sc_sar - new_cfa; + fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_OFFSET; + fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset + = (long) &sc->sc_iaoq[0] - new_cfa; + fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN; + fs->signal_frame = 1; + return _URC_NO_REASON; +} +#endif /* inhibit_libc */ diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S new file mode 100644 index 000000000..2e9c4f741 --- /dev/null +++ b/gcc/config/pa/milli64.S @@ -0,0 +1,2134 @@ +/* 32 and 64-bit millicode, original author Hewlett-Packard + adapted for gcc by Paul Bame <bame@debian.org> + and Alan Modra <alan@linuxcare.com.au>. + + Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifdef pa64 + .level 2.0w +#endif + +/* Hardware General Registers. */ +r0: .reg %r0 +r1: .reg %r1 +r2: .reg %r2 +r3: .reg %r3 +r4: .reg %r4 +r5: .reg %r5 +r6: .reg %r6 +r7: .reg %r7 +r8: .reg %r8 +r9: .reg %r9 +r10: .reg %r10 +r11: .reg %r11 +r12: .reg %r12 +r13: .reg %r13 +r14: .reg %r14 +r15: .reg %r15 +r16: .reg %r16 +r17: .reg %r17 +r18: .reg %r18 +r19: .reg %r19 +r20: .reg %r20 +r21: .reg %r21 +r22: .reg %r22 +r23: .reg %r23 +r24: .reg %r24 +r25: .reg %r25 +r26: .reg %r26 +r27: .reg %r27 +r28: .reg %r28 +r29: .reg %r29 +r30: .reg %r30 +r31: .reg %r31 + +/* Hardware Space Registers. */ +sr0: .reg %sr0 +sr1: .reg %sr1 +sr2: .reg %sr2 +sr3: .reg %sr3 +sr4: .reg %sr4 +sr5: .reg %sr5 +sr6: .reg %sr6 +sr7: .reg %sr7 + +/* Hardware Floating Point Registers. */ +fr0: .reg %fr0 +fr1: .reg %fr1 +fr2: .reg %fr2 +fr3: .reg %fr3 +fr4: .reg %fr4 +fr5: .reg %fr5 +fr6: .reg %fr6 +fr7: .reg %fr7 +fr8: .reg %fr8 +fr9: .reg %fr9 +fr10: .reg %fr10 +fr11: .reg %fr11 +fr12: .reg %fr12 +fr13: .reg %fr13 +fr14: .reg %fr14 +fr15: .reg %fr15 + +/* Hardware Control Registers. */ +cr11: .reg %cr11 +sar: .reg %cr11 /* Shift Amount Register */ + +/* Software Architecture General Registers. */ +rp: .reg r2 /* return pointer */ +#ifdef pa64 +mrp: .reg r2 /* millicode return pointer */ +#else +mrp: .reg r31 /* millicode return pointer */ +#endif +ret0: .reg r28 /* return value */ +ret1: .reg r29 /* return value (high part of double) */ +sp: .reg r30 /* stack pointer */ +dp: .reg r27 /* data pointer */ +arg0: .reg r26 /* argument */ +arg1: .reg r25 /* argument or high part of double argument */ +arg2: .reg r24 /* argument */ +arg3: .reg r23 /* argument or high part of double argument */ + +/* Software Architecture Space Registers. */ +/* sr0 ; return link from BLE */ +sret: .reg sr1 /* return value */ +sarg: .reg sr1 /* argument */ +/* sr4 ; PC SPACE tracker */ +/* sr5 ; process private data */ + +/* Frame Offsets (millicode convention!) Used when calling other + millicode routines. Stack unwinding is dependent upon these + definitions. */ +r31_slot: .equ -20 /* "current RP" slot */ +sr0_slot: .equ -16 /* "static link" slot */ +#if defined(pa64) +mrp_slot: .equ -16 /* "current RP" slot */ +psp_slot: .equ -8 /* "previous SP" slot */ +#else +mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */ +#endif + + +#define DEFINE(name,value)name: .EQU value +#define RDEFINE(name,value)name: .REG value +#ifdef milliext +#define MILLI_BE(lbl) BE lbl(sr7,r0) +#define MILLI_BEN(lbl) BE,n lbl(sr7,r0) +#define MILLI_BLE(lbl) BLE lbl(sr7,r0) +#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0) +#define MILLIRETN BE,n 0(sr0,mrp) +#define MILLIRET BE 0(sr0,mrp) +#define MILLI_RETN BE,n 0(sr0,mrp) +#define MILLI_RET BE 0(sr0,mrp) +#else +#define MILLI_BE(lbl) B lbl +#define MILLI_BEN(lbl) B,n lbl +#define MILLI_BLE(lbl) BL lbl,mrp +#define MILLI_BLEN(lbl) BL,n lbl,mrp +#define MILLIRETN BV,n 0(mrp) +#define MILLIRET BV 0(mrp) +#define MILLI_RETN BV,n 0(mrp) +#define MILLI_RET BV 0(mrp) +#endif + +#ifdef __STDC__ +#define CAT(a,b) a##b +#else +#define CAT(a,b) a/**/b +#endif + +#ifdef ELF +#define SUBSPA_MILLI .section .text +#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16 +#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16 +#define ATTR_MILLI +#define SUBSPA_DATA .section .data +#define ATTR_DATA +#define GLOBAL $global$ +#define GSYM(sym) !sym: +#define LSYM(sym) !CAT(.L,sym:) +#define LREF(sym) CAT(.L,sym) + +#else + +#ifdef coff +/* This used to be .milli but since link32 places different named + sections in different segments millicode ends up a long ways away + from .text (1meg?). This way they will be a lot closer. + + The SUBSPA_MILLI_* specify locality sets for certain millicode + modules in order to ensure that modules that call one another are + placed close together. Without locality sets this is unlikely to + happen because of the Dynamite linker library search algorithm. We + want these modules close together so that short calls always reach + (we don't want to require long calls or use long call stubs). */ + +#define SUBSPA_MILLI .subspa .text +#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16 +#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16 +#define ATTR_MILLI .attr code,read,execute +#define SUBSPA_DATA .subspa .data +#define ATTR_DATA .attr init_data,read,write +#define GLOBAL _gp +#else +#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8 +#define SUBSPA_MILLI_DIV SUBSPA_MILLI +#define SUBSPA_MILLI_MUL SUBSPA_MILLI +#define ATTR_MILLI +#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero +#define ATTR_DATA +#define GLOBAL $global$ +#endif +#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16 + +#define GSYM(sym) !sym +#define LSYM(sym) !CAT(L$,sym) +#define LREF(sym) CAT(L$,sym) +#endif + +#ifdef L_dyncall + SUBSPA_MILLI + ATTR_DATA +GSYM($$dyncall) + .export $$dyncall,millicode + .proc + .callinfo millicode + .entry + bb,>=,n %r22,30,LREF(1) ; branch if not plabel address + depi 0,31,2,%r22 ; clear the two least significant bits + ldw 4(%r22),%r19 ; load new LTP value + ldw 0(%r22),%r22 ; load address of target +LSYM(1) +#ifdef LINUX + bv %r0(%r22) ; branch to the real target +#else + ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22 + mtsp %r1,%sr0 ; move that space identifier into sr0 + be 0(%sr0,%r22) ; branch to the real target +#endif + stw %r2,-24(%r30) ; save return address into frame marker + .exit + .procend +#endif + +#ifdef L_divI +/* ROUTINES: $$divI, $$divoI + + Single precision divide for signed binary integers. + + The quotient is truncated towards zero. + The sign of the quotient is the XOR of the signs of the dividend and + divisor. + Divide by zero is trapped. + Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI. + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = quotient + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: + . divisor is zero (traps with ADDIT,= 0,25,0) + . dividend==-2**31 and divisor==-1 and routine is $$divoI + . (traps with ADDO 26,25,0) + . Changes memory at the following places: + . NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Branchs to other millicode routines using BE + . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15 + . + . For selected divisors, calls a divide by constant routine written by + . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13. + . + . The only overflow case is -2**31 divided by -1. + . Both routines return -2**31 but only $$divoI traps. */ + +RDEFINE(temp,r1) +RDEFINE(retreg,ret1) /* r29 */ +RDEFINE(temp1,arg0) + SUBSPA_MILLI_DIV + ATTR_MILLI + .import $$divI_2,millicode + .import $$divI_3,millicode + .import $$divI_4,millicode + .import $$divI_5,millicode + .import $$divI_6,millicode + .import $$divI_7,millicode + .import $$divI_8,millicode + .import $$divI_9,millicode + .import $$divI_10,millicode + .import $$divI_12,millicode + .import $$divI_14,millicode + .import $$divI_15,millicode + .export $$divI,millicode + .export $$divoI,millicode + .proc + .callinfo millicode + .entry +GSYM($$divoI) + comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */ +GSYM($$divI) + ldo -1(arg1),temp /* is there at most one bit set ? */ + and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */ + addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */ + b,n LREF(neg_denom) +LSYM(pow2) + addi,>= 0,arg0,retreg /* if numerator is negative, add the */ + add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */ + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ + extrs retreg,15,16,retreg /* retreg = retreg >> 16 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ + ldi 0xcc,temp1 /* setup 0xcc in temp1 */ + extru,= arg1,23,8,temp /* test denominator with 0xff00 */ + extrs retreg,23,24,retreg /* retreg = retreg >> 8 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ + ldi 0xaa,temp /* setup 0xaa in temp */ + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ + extrs retreg,27,28,retreg /* retreg = retreg >> 4 */ + and,= arg1,temp1,r0 /* test denominator with 0xcc */ + extrs retreg,29,30,retreg /* retreg = retreg >> 2 */ + and,= arg1,temp,r0 /* test denominator with 0xaa */ + extrs retreg,30,31,retreg /* retreg = retreg >> 1 */ + MILLIRETN +LSYM(neg_denom) + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */ + b,n LREF(regular_seq) + sub r0,arg1,temp /* make denominator positive */ + comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */ + ldo -1(temp),retreg /* is there at most one bit set ? */ + and,= temp,retreg,r0 /* if so, the denominator is power of 2 */ + b,n LREF(regular_seq) + sub r0,arg0,retreg /* negate numerator */ + comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */ + copy retreg,arg0 /* set up arg0, arg1 and temp */ + copy temp,arg1 /* before branching to pow2 */ + b LREF(pow2) + ldo -1(arg1),temp +LSYM(regular_seq) + comib,>>=,n 15,arg1,LREF(small_divisor) + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ +LSYM(normal) + subi 0,retreg,retreg /* make it positive */ + sub 0,arg1,temp /* clear carry, */ + /* negate the divisor */ + ds 0,temp,0 /* set V-bit to the comple- */ + /* ment of the divisor sign */ + add retreg,retreg,retreg /* shift msb bit into carry */ + ds r0,arg1,temp /* 1st divide step, if no carry */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 2nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 3rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 4th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 5th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 6th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 7th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 8th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 9th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 10th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 11th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 12th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 13th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 14th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 15th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 16th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 17th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 18th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 19th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 20th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 21st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 22nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 23rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 24th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 25th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 26th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 27th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 28th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 29th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 30th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 31st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 32nd divide step, */ + addc retreg,retreg,retreg /* shift last retreg bit into retreg */ + xor,>= arg0,arg1,0 /* get correct sign of quotient */ + sub 0,retreg,retreg /* based on operand signs */ + MILLIRETN + nop + +LSYM(small_divisor) + +#if defined(pa64) +/* Clear the upper 32 bits of the arg1 register. We are working with */ +/* small divisors (and 32-bit integers) We must not be mislead */ +/* by "1" bits left in the upper 32 bits. */ + depd %r0,31,32,%r25 +#endif + blr,n arg1,r0 + nop +/* table for divisor == 0,1, ... ,15 */ + addit,= 0,arg1,r0 /* trap if divisor == 0 */ + nop + MILLIRET /* divisor == 1 */ + copy arg0,retreg + MILLI_BEN($$divI_2) /* divisor == 2 */ + nop + MILLI_BEN($$divI_3) /* divisor == 3 */ + nop + MILLI_BEN($$divI_4) /* divisor == 4 */ + nop + MILLI_BEN($$divI_5) /* divisor == 5 */ + nop + MILLI_BEN($$divI_6) /* divisor == 6 */ + nop + MILLI_BEN($$divI_7) /* divisor == 7 */ + nop + MILLI_BEN($$divI_8) /* divisor == 8 */ + nop + MILLI_BEN($$divI_9) /* divisor == 9 */ + nop + MILLI_BEN($$divI_10) /* divisor == 10 */ + nop + b LREF(normal) /* divisor == 11 */ + add,>= 0,arg0,retreg + MILLI_BEN($$divI_12) /* divisor == 12 */ + nop + b LREF(normal) /* divisor == 13 */ + add,>= 0,arg0,retreg + MILLI_BEN($$divI_14) /* divisor == 14 */ + nop + MILLI_BEN($$divI_15) /* divisor == 15 */ + nop + +LSYM(negative1) + sub 0,arg0,retreg /* result is negation of dividend */ + MILLIRET + addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ + .exit + .procend + .end +#endif + +#ifdef L_divU +/* ROUTINE: $$divU + . + . Single precision divide for unsigned integers. + . + . Quotient is truncated towards zero. + . Traps on divide by zero. + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = quotient + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: + . divisor is zero + . Changes memory at the following places: + . NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Does not create a stack frame. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Branchs to other millicode routines using BE: + . $$divU_# for 3,5,6,7,9,10,12,14,15 + . + . For selected small divisors calls the special divide by constant + . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */ + +RDEFINE(temp,r1) +RDEFINE(retreg,ret1) /* r29 */ +RDEFINE(temp1,arg0) + SUBSPA_MILLI_DIV + ATTR_MILLI + .export $$divU,millicode + .import $$divU_3,millicode + .import $$divU_5,millicode + .import $$divU_6,millicode + .import $$divU_7,millicode + .import $$divU_9,millicode + .import $$divU_10,millicode + .import $$divU_12,millicode + .import $$divU_14,millicode + .import $$divU_15,millicode + .proc + .callinfo millicode + .entry +GSYM($$divU) +/* The subtract is not nullified since it does no harm and can be used + by the two cases that branch back to "normal". */ + ldo -1(arg1),temp /* is there at most one bit set ? */ + and,= arg1,temp,r0 /* if so, denominator is power of 2 */ + b LREF(regular_seq) + addit,= 0,arg1,0 /* trap for zero dvr */ + copy arg0,retreg + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ + extru retreg,15,16,retreg /* retreg = retreg >> 16 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ + ldi 0xcc,temp1 /* setup 0xcc in temp1 */ + extru,= arg1,23,8,temp /* test denominator with 0xff00 */ + extru retreg,23,24,retreg /* retreg = retreg >> 8 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ + ldi 0xaa,temp /* setup 0xaa in temp */ + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ + extru retreg,27,28,retreg /* retreg = retreg >> 4 */ + and,= arg1,temp1,r0 /* test denominator with 0xcc */ + extru retreg,29,30,retreg /* retreg = retreg >> 2 */ + and,= arg1,temp,r0 /* test denominator with 0xaa */ + extru retreg,30,31,retreg /* retreg = retreg >> 1 */ + MILLIRETN + nop +LSYM(regular_seq) + comib,>= 15,arg1,LREF(special_divisor) + subi 0,arg1,temp /* clear carry, negate the divisor */ + ds r0,temp,r0 /* set V-bit to 1 */ +LSYM(normal) + add arg0,arg0,retreg /* shift msb bit into carry */ + ds r0,arg1,temp /* 1st divide step, if no carry */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 2nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 3rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 4th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 5th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 6th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 7th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 8th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 9th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 10th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 11th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 12th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 13th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 14th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 15th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 16th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 17th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 18th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 19th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 20th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 21st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 22nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 23rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 24th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 25th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 26th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 27th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 28th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 29th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 30th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 31st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 32nd divide step, */ + MILLIRET + addc retreg,retreg,retreg /* shift last retreg bit into retreg */ + +/* Handle the cases where divisor is a small constant or has high bit on. */ +LSYM(special_divisor) +/* blr arg1,r0 */ +/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */ + +/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from + generating such a blr, comib sequence. A problem in nullification. So I + rewrote this code. */ + +#if defined(pa64) +/* Clear the upper 32 bits of the arg1 register. We are working with + small divisors (and 32-bit unsigned integers) We must not be mislead + by "1" bits left in the upper 32 bits. */ + depd %r0,31,32,%r25 +#endif + comib,> 0,arg1,LREF(big_divisor) + nop + blr arg1,r0 + nop + +LSYM(zero_divisor) /* this label is here to provide external visibility */ + addit,= 0,arg1,0 /* trap for zero dvr */ + nop + MILLIRET /* divisor == 1 */ + copy arg0,retreg + MILLIRET /* divisor == 2 */ + extru arg0,30,31,retreg + MILLI_BEN($$divU_3) /* divisor == 3 */ + nop + MILLIRET /* divisor == 4 */ + extru arg0,29,30,retreg + MILLI_BEN($$divU_5) /* divisor == 5 */ + nop + MILLI_BEN($$divU_6) /* divisor == 6 */ + nop + MILLI_BEN($$divU_7) /* divisor == 7 */ + nop + MILLIRET /* divisor == 8 */ + extru arg0,28,29,retreg + MILLI_BEN($$divU_9) /* divisor == 9 */ + nop + MILLI_BEN($$divU_10) /* divisor == 10 */ + nop + b LREF(normal) /* divisor == 11 */ + ds r0,temp,r0 /* set V-bit to 1 */ + MILLI_BEN($$divU_12) /* divisor == 12 */ + nop + b LREF(normal) /* divisor == 13 */ + ds r0,temp,r0 /* set V-bit to 1 */ + MILLI_BEN($$divU_14) /* divisor == 14 */ + nop + MILLI_BEN($$divU_15) /* divisor == 15 */ + nop + +/* Handle the case where the high bit is on in the divisor. + Compute: if( dividend>=divisor) quotient=1; else quotient=0; + Note: dividend>==divisor iff dividend-divisor does not borrow + and not borrow iff carry. */ +LSYM(big_divisor) + sub arg0,arg1,r0 + MILLIRET + addc r0,r0,retreg + .exit + .procend + .end +#endif + +#ifdef L_remI +/* ROUTINE: $$remI + + DESCRIPTION: + . $$remI returns the remainder of the division of two signed 32-bit + . integers. The sign of the remainder is the same as the sign of + . the dividend. + + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = destroyed + . arg1 = destroyed + . ret1 = remainder + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: DIVIDE BY ZERO + . Changes memory at the following places: NONE + + PERMISSIBLE CONTEXT: + . Unwindable + . Does not create a stack frame + . Is usable for internal or external microcode + + DISCUSSION: + . Calls other millicode routines via mrp: NONE + . Calls other millicode routines: NONE */ + +RDEFINE(tmp,r1) +RDEFINE(retreg,ret1) + + SUBSPA_MILLI + ATTR_MILLI + .proc + .callinfo millicode + .entry +GSYM($$remI) +GSYM($$remoI) + .export $$remI,MILLICODE + .export $$remoI,MILLICODE + ldo -1(arg1),tmp /* is there at most one bit set ? */ + and,<> arg1,tmp,r0 /* if not, don't use power of 2 */ + addi,> 0,arg1,r0 /* if denominator > 0, use power */ + /* of 2 */ + b,n LREF(neg_denom) +LSYM(pow2) + comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */ + and arg0,tmp,retreg /* get the result */ + MILLIRETN +LSYM(neg_num) + subi 0,arg0,arg0 /* negate numerator */ + and arg0,tmp,retreg /* get the result */ + subi 0,retreg,retreg /* negate result */ + MILLIRETN +LSYM(neg_denom) + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */ + /* of 2 */ + b,n LREF(regular_seq) + sub r0,arg1,tmp /* make denominator positive */ + comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */ + ldo -1(tmp),retreg /* is there at most one bit set ? */ + and,= tmp,retreg,r0 /* if not, go to regular_seq */ + b,n LREF(regular_seq) + comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */ + and arg0,retreg,retreg + MILLIRETN +LSYM(neg_num_2) + subi 0,arg0,tmp /* test against 0x80000000 */ + and tmp,retreg,retreg + subi 0,retreg,retreg + MILLIRETN +LSYM(regular_seq) + addit,= 0,arg1,0 /* trap if div by zero */ + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ + sub 0,retreg,retreg /* make it positive */ + sub 0,arg1, tmp /* clear carry, */ + /* negate the divisor */ + ds 0, tmp,0 /* set V-bit to the comple- */ + /* ment of the divisor sign */ + or 0,0, tmp /* clear tmp */ + add retreg,retreg,retreg /* shift msb bit into carry */ + ds tmp,arg1, tmp /* 1st divide step, if no carry */ + /* out, msb of quotient = 0 */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ +LSYM(t1) + ds tmp,arg1, tmp /* 2nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 3rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 4th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 5th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 6th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 7th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 8th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 9th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 10th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 11th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 12th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 13th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 14th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 15th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 16th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 17th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 18th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 19th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 20th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 21st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 22nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 23rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 24th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 25th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 26th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 27th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 28th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 29th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 30th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 31st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 32nd divide step, */ + addc retreg,retreg,retreg /* shift last bit into retreg */ + movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */ + add,< arg1,0,0 /* if arg1 > 0, add arg1 */ + add,tr tmp,arg1,retreg /* for correcting remainder tmp */ + sub tmp,arg1,retreg /* else add absolute value arg1 */ +LSYM(finish) + add,>= arg0,0,0 /* set sign of remainder */ + sub 0,retreg,retreg /* to sign of dividend */ + MILLIRET + nop + .exit + .procend +#ifdef milliext + .origin 0x00000200 +#endif + .end +#endif + +#ifdef L_remU +/* ROUTINE: $$remU + . Single precision divide for remainder with unsigned binary integers. + . + . The remainder must be dividend-(dividend/divisor)*divisor. + . Divide by zero is trapped. + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = remainder + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: DIVIDE BY ZERO + . Changes memory at the following places: NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Does not create a stack frame. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Calls other millicode routines using mrp: NONE + . Calls other millicode routines: NONE */ + + +RDEFINE(temp,r1) +RDEFINE(rmndr,ret1) /* r29 */ + SUBSPA_MILLI + ATTR_MILLI + .export $$remU,millicode + .proc + .callinfo millicode + .entry +GSYM($$remU) + ldo -1(arg1),temp /* is there at most one bit set ? */ + and,= arg1,temp,r0 /* if not, don't use power of 2 */ + b LREF(regular_seq) + addit,= 0,arg1,r0 /* trap on div by zero */ + and arg0,temp,rmndr /* get the result for power of 2 */ + MILLIRETN +LSYM(regular_seq) + comib,>=,n 0,arg1,LREF(special_case) + subi 0,arg1,rmndr /* clear carry, negate the divisor */ + ds r0,rmndr,r0 /* set V-bit to 1 */ + add arg0,arg0,temp /* shift msb bit into carry */ + ds r0,arg1,rmndr /* 1st divide step, if no carry */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 2nd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 3rd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 4th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 5th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 6th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 7th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 8th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 9th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 10th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 11th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 12th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 13th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 14th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 15th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 16th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 17th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 18th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 19th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 20th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 21st divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 22nd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 23rd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 24th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 25th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 26th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 27th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 28th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 29th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 30th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 31st divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 32nd divide step, */ + comiclr,<= 0,rmndr,r0 + add rmndr,arg1,rmndr /* correction */ + MILLIRETN + nop + +/* Putting >= on the last DS and deleting COMICLR does not work! */ +LSYM(special_case) + sub,>>= arg0,arg1,rmndr + copy arg0,rmndr + MILLIRETN + nop + .exit + .procend + .end +#endif + +#ifdef L_div_const +/* ROUTINE: $$divI_2 + . $$divI_3 $$divU_3 + . $$divI_4 + . $$divI_5 $$divU_5 + . $$divI_6 $$divU_6 + . $$divI_7 $$divU_7 + . $$divI_8 + . $$divI_9 $$divU_9 + . $$divI_10 $$divU_10 + . + . $$divI_12 $$divU_12 + . + . $$divI_14 $$divU_14 + . $$divI_15 $$divU_15 + . $$divI_16 + . $$divI_17 $$divU_17 + . + . Divide by selected constants for single precision binary integers. + + INPUT REGISTERS: + . arg0 == dividend + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = quotient + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: NONE + . Changes memory at the following places: NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Does not create a stack frame. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Calls other millicode routines using mrp: NONE + . Calls other millicode routines: NONE */ + + +/* TRUNCATED DIVISION BY SMALL INTEGERS + + We are interested in q(x) = floor(x/y), where x >= 0 and y > 0 + (with y fixed). + + Let a = floor(z/y), for some choice of z. Note that z will be + chosen so that division by z is cheap. + + Let r be the remainder(z/y). In other words, r = z - ay. + + Now, our method is to choose a value for b such that + + q'(x) = floor((ax+b)/z) + + is equal to q(x) over as large a range of x as possible. If the + two are equal over a sufficiently large range, and if it is easy to + form the product (ax), and it is easy to divide by z, then we can + perform the division much faster than the general division algorithm. + + So, we want the following to be true: + + . For x in the following range: + . + . ky <= x < (k+1)y + . + . implies that + . + . k <= (ax+b)/z < (k+1) + + We want to determine b such that this is true for all k in the + range {0..K} for some maximum K. + + Since (ax+b) is an increasing function of x, we can take each + bound separately to determine the "best" value for b. + + (ax+b)/z < (k+1) implies + + (a((k+1)y-1)+b < (k+1)z implies + + b < a + (k+1)(z-ay) implies + + b < a + (k+1)r + + This needs to be true for all k in the range {0..K}. In + particular, it is true for k = 0 and this leads to a maximum + acceptable value for b. + + b < a+r or b <= a+r-1 + + Taking the other bound, we have + + k <= (ax+b)/z implies + + k <= (aky+b)/z implies + + k(z-ay) <= b implies + + kr <= b + + Clearly, the largest range for k will be achieved by maximizing b, + when r is not zero. When r is zero, then the simplest choice for b + is 0. When r is not 0, set + + . b = a+r-1 + + Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y) + for all x in the range: + + . 0 <= x < (K+1)y + + We need to determine what K is. Of our two bounds, + + . b < a+(k+1)r is satisfied for all k >= 0, by construction. + + The other bound is + + . kr <= b + + This is always true if r = 0. If r is not 0 (the usual case), then + K = floor((a+r-1)/r), is the maximum value for k. + + Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct + answer for q(x) = floor(x/y) when x is in the range + + (0,(K+1)y-1) K = floor((a+r-1)/r) + + To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that + the formula for q'(x) yields the correct value of q(x) for all x + representable by a single word in HPPA. + + We are also constrained in that computing the product (ax), adding + b, and dividing by z must all be done quickly, otherwise we will be + better off going through the general algorithm using the DS + instruction, which uses approximately 70 cycles. + + For each y, there is a choice of z which satisfies the constraints + for (K+1)y >= 2**32. We may not, however, be able to satisfy the + timing constraints for arbitrary y. It seems that z being equal to + a power of 2 or a power of 2 minus 1 is as good as we can do, since + it minimizes the time to do division by z. We want the choice of z + to also result in a value for (a) that minimizes the computation of + the product (ax). This is best achieved if (a) has a regular bit + pattern (so the multiplication can be done with shifts and adds). + The value of (a) also needs to be less than 2**32 so the product is + always guaranteed to fit in 2 words. + + In actual practice, the following should be done: + + 1) For negative x, you should take the absolute value and remember + . the fact so that the result can be negated. This obviously does + . not apply in the unsigned case. + 2) For even y, you should factor out the power of 2 that divides y + . and divide x by it. You can then proceed by dividing by the + . odd factor of y. + + Here is a table of some odd values of y, and corresponding choices + for z which are "good". + + y z r a (hex) max x (hex) + + 3 2**32 1 55555555 100000001 + 5 2**32 1 33333333 100000003 + 7 2**24-1 0 249249 (infinite) + 9 2**24-1 0 1c71c7 (infinite) + 11 2**20-1 0 1745d (infinite) + 13 2**24-1 0 13b13b (infinite) + 15 2**32 1 11111111 10000000d + 17 2**32 1 f0f0f0f 10000000f + + If r is 1, then b = a+r-1 = a. This simplifies the computation + of (ax+b), since you can compute (x+1)(a) instead. If r is 0, + then b = 0 is ok to use which simplifies (ax+b). + + The bit patterns for 55555555, 33333333, and 11111111 are obviously + very regular. The bit patterns for the other values of a above are: + + y (hex) (binary) + + 7 249249 001001001001001001001001 << regular >> + 9 1c71c7 000111000111000111000111 << regular >> + 11 1745d 000000010111010001011101 << irregular >> + 13 13b13b 000100111011000100111011 << irregular >> + + The bit patterns for (a) corresponding to (y) of 11 and 13 may be + too irregular to warrant using this method. + + When z is a power of 2 minus 1, then the division by z is slightly + more complicated, involving an iterative solution. + + The code presented here solves division by 1 through 17, except for + 11 and 13. There are algorithms for both signed and unsigned + quantities given. + + TIMINGS (cycles) + + divisor positive negative unsigned + + . 1 2 2 2 + . 2 4 4 2 + . 3 19 21 19 + . 4 4 4 2 + . 5 18 22 19 + . 6 19 22 19 + . 8 4 4 2 + . 10 18 19 17 + . 12 18 20 18 + . 15 16 18 16 + . 16 4 4 2 + . 17 16 18 16 + + Now, the algorithm for 7, 9, and 14 is an iterative one. That is, + a loop body is executed until the tentative quotient is 0. The + number of times the loop body is executed varies depending on the + dividend, but is never more than two times. If the dividend is + less than the divisor, then the loop body is not executed at all. + Each iteration adds 4 cycles to the timings. + + divisor positive negative unsigned + + . 7 19+4n 20+4n 20+4n n = number of iterations + . 9 21+4n 22+4n 21+4n + . 14 21+4n 22+4n 20+4n + + To give an idea of how the number of iterations varies, here is a + table of dividend versus number of iterations when dividing by 7. + + smallest largest required + dividend dividend iterations + + . 0 6 0 + . 7 0x6ffffff 1 + 0x1000006 0xffffffff 2 + + There is some overlap in the range of numbers requiring 1 and 2 + iterations. */ + +RDEFINE(t2,r1) +RDEFINE(x2,arg0) /* r26 */ +RDEFINE(t1,arg1) /* r25 */ +RDEFINE(x1,ret1) /* r29 */ + + SUBSPA_MILLI_DIV + ATTR_MILLI + + .proc + .callinfo millicode + .entry +/* NONE of these routines require a stack frame + ALL of these routines are unwindable from millicode */ + +GSYM($$divide_by_constant) + .export $$divide_by_constant,millicode +/* Provides a "nice" label for the code covered by the unwind descriptor + for things like gprof. */ + +/* DIVISION BY 2 (shift by 1) */ +GSYM($$divI_2) + .export $$divI_2,millicode + comclr,>= arg0,0,0 + addi 1,arg0,arg0 + MILLIRET + extrs arg0,30,31,ret1 + + +/* DIVISION BY 4 (shift by 2) */ +GSYM($$divI_4) + .export $$divI_4,millicode + comclr,>= arg0,0,0 + addi 3,arg0,arg0 + MILLIRET + extrs arg0,29,30,ret1 + + +/* DIVISION BY 8 (shift by 3) */ +GSYM($$divI_8) + .export $$divI_8,millicode + comclr,>= arg0,0,0 + addi 7,arg0,arg0 + MILLIRET + extrs arg0,28,29,ret1 + +/* DIVISION BY 16 (shift by 4) */ +GSYM($$divI_16) + .export $$divI_16,millicode + comclr,>= arg0,0,0 + addi 15,arg0,arg0 + MILLIRET + extrs arg0,27,28,ret1 + +/**************************************************************************** +* +* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these +* +* includes 3,5,15,17 and also 6,10,12 +* +****************************************************************************/ + +/* DIVISION BY 3 (use z = 2**32; a = 55555555) */ + +GSYM($$divI_3) + .export $$divI_3,millicode + comb,<,N x2,0,LREF(neg3) + + addi 1,x2,x2 /* this cannot overflow */ + extru x2,1,2,x1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(pos) + addc x1,0,x1 + +LSYM(neg3) + subi 1,x2,x2 /* this cannot overflow */ + extru x2,1,2,x1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(neg) + addc x1,0,x1 + +GSYM($$divU_3) + .export $$divU_3,millicode + addi 1,x2,x2 /* this CAN overflow */ + addc 0,0,x1 + shd x1,x2,30,t1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(pos) + addc x1,t1,x1 + +/* DIVISION BY 5 (use z = 2**32; a = 33333333) */ + +GSYM($$divI_5) + .export $$divI_5,millicode + comb,<,N x2,0,LREF(neg5) + + addi 3,x2,t1 /* this cannot overflow */ + sh1add x2,t1,x2 /* multiply by 3 to get started */ + b LREF(pos) + addc 0,0,x1 + +LSYM(neg5) + sub 0,x2,x2 /* negate x2 */ + addi 1,x2,x2 /* this cannot overflow */ + shd 0,x2,31,x1 /* get top bit (can be 1) */ + sh1add x2,x2,x2 /* multiply by 3 to get started */ + b LREF(neg) + addc x1,0,x1 + +GSYM($$divU_5) + .export $$divU_5,millicode + addi 1,x2,x2 /* this CAN overflow */ + addc 0,0,x1 + shd x1,x2,31,t1 /* multiply by 3 to get started */ + sh1add x2,x2,x2 + b LREF(pos) + addc t1,x1,x1 + +/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */ +GSYM($$divI_6) + .export $$divI_6,millicode + comb,<,N x2,0,LREF(neg6) + extru x2,30,31,x2 /* divide by 2 */ + addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */ + sh2add x2,t1,x2 /* multiply by 5 to get started */ + b LREF(pos) + addc 0,0,x1 + +LSYM(neg6) + subi 2,x2,x2 /* negate, divide by 2, and add 1 */ + /* negation and adding 1 are done */ + /* at the same time by the SUBI */ + extru x2,30,31,x2 + shd 0,x2,30,x1 + sh2add x2,x2,x2 /* multiply by 5 to get started */ + b LREF(neg) + addc x1,0,x1 + +GSYM($$divU_6) + .export $$divU_6,millicode + extru x2,30,31,x2 /* divide by 2 */ + addi 1,x2,x2 /* cannot carry */ + shd 0,x2,30,x1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(pos) + addc x1,0,x1 + +/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */ +GSYM($$divU_10) + .export $$divU_10,millicode + extru x2,30,31,x2 /* divide by 2 */ + addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */ + sh1add x2,t1,x2 /* multiply by 3 to get started */ + addc 0,0,x1 +LSYM(pos) + shd x1,x2,28,t1 /* multiply by 0x11 */ + shd x2,0,28,t2 + add x2,t2,x2 + addc x1,t1,x1 +LSYM(pos_for_17) + shd x1,x2,24,t1 /* multiply by 0x101 */ + shd x2,0,24,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,16,t1 /* multiply by 0x10001 */ + shd x2,0,16,t2 + add x2,t2,x2 + MILLIRET + addc x1,t1,x1 + +GSYM($$divI_10) + .export $$divI_10,millicode + comb,< x2,0,LREF(neg10) + copy 0,x1 + extru x2,30,31,x2 /* divide by 2 */ + addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */ + sh1add x2,x2,x2 /* multiply by 3 to get started */ + +LSYM(neg10) + subi 2,x2,x2 /* negate, divide by 2, and add 1 */ + /* negation and adding 1 are done */ + /* at the same time by the SUBI */ + extru x2,30,31,x2 + sh1add x2,x2,x2 /* multiply by 3 to get started */ +LSYM(neg) + shd x1,x2,28,t1 /* multiply by 0x11 */ + shd x2,0,28,t2 + add x2,t2,x2 + addc x1,t1,x1 +LSYM(neg_for_17) + shd x1,x2,24,t1 /* multiply by 0x101 */ + shd x2,0,24,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,16,t1 /* multiply by 0x10001 */ + shd x2,0,16,t2 + add x2,t2,x2 + addc x1,t1,x1 + MILLIRET + sub 0,x1,x1 + +/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */ +GSYM($$divI_12) + .export $$divI_12,millicode + comb,< x2,0,LREF(neg12) + copy 0,x1 + extru x2,29,30,x2 /* divide by 4 */ + addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */ + sh2add x2,x2,x2 /* multiply by 5 to get started */ + +LSYM(neg12) + subi 4,x2,x2 /* negate, divide by 4, and add 1 */ + /* negation and adding 1 are done */ + /* at the same time by the SUBI */ + extru x2,29,30,x2 + b LREF(neg) + sh2add x2,x2,x2 /* multiply by 5 to get started */ + +GSYM($$divU_12) + .export $$divU_12,millicode + extru x2,29,30,x2 /* divide by 4 */ + addi 5,x2,t1 /* cannot carry */ + sh2add x2,t1,x2 /* multiply by 5 to get started */ + b LREF(pos) + addc 0,0,x1 + +/* DIVISION BY 15 (use z = 2**32; a = 11111111) */ +GSYM($$divI_15) + .export $$divI_15,millicode + comb,< x2,0,LREF(neg15) + copy 0,x1 + addib,tr 1,x2,LREF(pos)+4 + shd x1,x2,28,t1 + +LSYM(neg15) + b LREF(neg) + subi 1,x2,x2 + +GSYM($$divU_15) + .export $$divU_15,millicode + addi 1,x2,x2 /* this CAN overflow */ + b LREF(pos) + addc 0,0,x1 + +/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */ +GSYM($$divI_17) + .export $$divI_17,millicode + comb,<,n x2,0,LREF(neg17) + addi 1,x2,x2 /* this cannot overflow */ + shd 0,x2,28,t1 /* multiply by 0xf to get started */ + shd x2,0,28,t2 + sub t2,x2,x2 + b LREF(pos_for_17) + subb t1,0,x1 + +LSYM(neg17) + subi 1,x2,x2 /* this cannot overflow */ + shd 0,x2,28,t1 /* multiply by 0xf to get started */ + shd x2,0,28,t2 + sub t2,x2,x2 + b LREF(neg_for_17) + subb t1,0,x1 + +GSYM($$divU_17) + .export $$divU_17,millicode + addi 1,x2,x2 /* this CAN overflow */ + addc 0,0,x1 + shd x1,x2,28,t1 /* multiply by 0xf to get started */ +LSYM(u17) + shd x2,0,28,t2 + sub t2,x2,x2 + b LREF(pos_for_17) + subb t1,x1,x1 + + +/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these + includes 7,9 and also 14 + + + z = 2**24-1 + r = z mod x = 0 + + so choose b = 0 + + Also, in order to divide by z = 2**24-1, we approximate by dividing + by (z+1) = 2**24 (which is easy), and then correcting. + + (ax) = (z+1)q' + r + . = zq' + (q'+r) + + So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1) + Then the true remainder of (ax)/z is (q'+r). Repeat the process + with this new remainder, adding the tentative quotients together, + until a tentative quotient is 0 (and then we are done). There is + one last correction to be done. It is possible that (q'+r) = z. + If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But, + in fact, we need to add 1 more to the quotient. Now, it turns + out that this happens if and only if the original value x is + an exact multiple of y. So, to avoid a three instruction test at + the end, instead use 1 instruction to add 1 to x at the beginning. */ + +/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */ +GSYM($$divI_7) + .export $$divI_7,millicode + comb,<,n x2,0,LREF(neg7) +LSYM(7) + addi 1,x2,x2 /* cannot overflow */ + shd 0,x2,29,x1 + sh3add x2,x2,x2 + addc x1,0,x1 +LSYM(pos7) + shd x1,x2,26,t1 + shd x2,0,26,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,20,t1 + shd x2,0,20,t2 + add x2,t2,x2 + addc x1,t1,t1 + + /* computed <t1,x2>. Now divide it by (2**24 - 1) */ + + copy 0,x1 + shd,= t1,x2,24,t1 /* tentative quotient */ +LSYM(1) + addb,tr t1,x1,LREF(2) /* add to previous quotient */ + extru x2,31,24,x2 /* new remainder (unadjusted) */ + + MILLIRETN + +LSYM(2) + addb,tr t1,x2,LREF(1) /* adjust remainder */ + extru,= x2,7,8,t1 /* new quotient */ + +LSYM(neg7) + subi 1,x2,x2 /* negate x2 and add 1 */ +LSYM(8) + shd 0,x2,29,x1 + sh3add x2,x2,x2 + addc x1,0,x1 + +LSYM(neg7_shift) + shd x1,x2,26,t1 + shd x2,0,26,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,20,t1 + shd x2,0,20,t2 + add x2,t2,x2 + addc x1,t1,t1 + + /* computed <t1,x2>. Now divide it by (2**24 - 1) */ + + copy 0,x1 + shd,= t1,x2,24,t1 /* tentative quotient */ +LSYM(3) + addb,tr t1,x1,LREF(4) /* add to previous quotient */ + extru x2,31,24,x2 /* new remainder (unadjusted) */ + + MILLIRET + sub 0,x1,x1 /* negate result */ + +LSYM(4) + addb,tr t1,x2,LREF(3) /* adjust remainder */ + extru,= x2,7,8,t1 /* new quotient */ + +GSYM($$divU_7) + .export $$divU_7,millicode + addi 1,x2,x2 /* can carry */ + addc 0,0,x1 + shd x1,x2,29,t1 + sh3add x2,x2,x2 + b LREF(pos7) + addc t1,x1,x1 + +/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */ +GSYM($$divI_9) + .export $$divI_9,millicode + comb,<,n x2,0,LREF(neg9) + addi 1,x2,x2 /* cannot overflow */ + shd 0,x2,29,t1 + shd x2,0,29,t2 + sub t2,x2,x2 + b LREF(pos7) + subb t1,0,x1 + +LSYM(neg9) + subi 1,x2,x2 /* negate and add 1 */ + shd 0,x2,29,t1 + shd x2,0,29,t2 + sub t2,x2,x2 + b LREF(neg7_shift) + subb t1,0,x1 + +GSYM($$divU_9) + .export $$divU_9,millicode + addi 1,x2,x2 /* can carry */ + addc 0,0,x1 + shd x1,x2,29,t1 + shd x2,0,29,t2 + sub t2,x2,x2 + b LREF(pos7) + subb t1,x1,x1 + +/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */ +GSYM($$divI_14) + .export $$divI_14,millicode + comb,<,n x2,0,LREF(neg14) +GSYM($$divU_14) + .export $$divU_14,millicode + b LREF(7) /* go to 7 case */ + extru x2,30,31,x2 /* divide by 2 */ + +LSYM(neg14) + subi 2,x2,x2 /* negate (and add 2) */ + b LREF(8) + extru x2,30,31,x2 /* divide by 2 */ + .exit + .procend + .end +#endif + +#ifdef L_mulI +/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */ +/****************************************************************************** +This routine is used on PA2.0 processors when gcc -mno-fpregs is used + +ROUTINE: $$mulI + + +DESCRIPTION: + + $$mulI multiplies two single word integers, giving a single + word result. + + +INPUT REGISTERS: + + arg0 = Operand 1 + arg1 = Operand 2 + r31 == return pc + sr0 == return space when called externally + + +OUTPUT REGISTERS: + + arg0 = undefined + arg1 = undefined + ret1 = result + +OTHER REGISTERS AFFECTED: + + r1 = undefined + +SIDE EFFECTS: + + Causes a trap under the following conditions: NONE + Changes memory at the following places: NONE + +PERMISSIBLE CONTEXT: + + Unwindable + Does not create a stack frame + Is usable for internal or external microcode + +DISCUSSION: + + Calls other millicode routines via mrp: NONE + Calls other millicode routines: NONE + +***************************************************************************/ + + +#define a0 %arg0 +#define a1 %arg1 +#define t0 %r1 +#define r %ret1 + +#define a0__128a0 zdep a0,24,25,a0 +#define a0__256a0 zdep a0,23,24,a0 +#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0) +#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1) +#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2) +#define b_n_ret_t0 b,n LREF(ret_t0) +#define b_e_shift b LREF(e_shift) +#define b_e_t0ma0 b LREF(e_t0ma0) +#define b_e_t0 b LREF(e_t0) +#define b_e_t0a0 b LREF(e_t0a0) +#define b_e_t02a0 b LREF(e_t02a0) +#define b_e_t04a0 b LREF(e_t04a0) +#define b_e_2t0 b LREF(e_2t0) +#define b_e_2t0a0 b LREF(e_2t0a0) +#define b_e_2t04a0 b LREF(e2t04a0) +#define b_e_3t0 b LREF(e_3t0) +#define b_e_4t0 b LREF(e_4t0) +#define b_e_4t0a0 b LREF(e_4t0a0) +#define b_e_4t08a0 b LREF(e4t08a0) +#define b_e_5t0 b LREF(e_5t0) +#define b_e_8t0 b LREF(e_8t0) +#define b_e_8t0a0 b LREF(e_8t0a0) +#define r__r_a0 add r,a0,r +#define r__r_2a0 sh1add a0,r,r +#define r__r_4a0 sh2add a0,r,r +#define r__r_8a0 sh3add a0,r,r +#define r__r_t0 add r,t0,r +#define r__r_2t0 sh1add t0,r,r +#define r__r_4t0 sh2add t0,r,r +#define r__r_8t0 sh3add t0,r,r +#define t0__3a0 sh1add a0,a0,t0 +#define t0__4a0 sh2add a0,0,t0 +#define t0__5a0 sh2add a0,a0,t0 +#define t0__8a0 sh3add a0,0,t0 +#define t0__9a0 sh3add a0,a0,t0 +#define t0__16a0 zdep a0,27,28,t0 +#define t0__32a0 zdep a0,26,27,t0 +#define t0__64a0 zdep a0,25,26,t0 +#define t0__128a0 zdep a0,24,25,t0 +#define t0__t0ma0 sub t0,a0,t0 +#define t0__t0_a0 add t0,a0,t0 +#define t0__t0_2a0 sh1add a0,t0,t0 +#define t0__t0_4a0 sh2add a0,t0,t0 +#define t0__t0_8a0 sh3add a0,t0,t0 +#define t0__2t0_a0 sh1add t0,a0,t0 +#define t0__3t0 sh1add t0,t0,t0 +#define t0__4t0 sh2add t0,0,t0 +#define t0__4t0_a0 sh2add t0,a0,t0 +#define t0__5t0 sh2add t0,t0,t0 +#define t0__8t0 sh3add t0,0,t0 +#define t0__8t0_a0 sh3add t0,a0,t0 +#define t0__9t0 sh3add t0,t0,t0 +#define t0__16t0 zdep t0,27,28,t0 +#define t0__32t0 zdep t0,26,27,t0 +#define t0__256a0 zdep a0,23,24,t0 + + + SUBSPA_MILLI + ATTR_MILLI + .align 16 + .proc + .callinfo millicode + .export $$mulI,millicode +GSYM($$mulI) + combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */ + copy 0,r /* zero out the result */ + xor a0,a1,a0 /* swap a0 & a1 using the */ + xor a0,a1,a1 /* old xor trick */ + xor a0,a1,a0 +LSYM(l4) + combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */ + zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ + sub,> 0,a1,t0 /* otherwise negate both and */ + combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */ + sub 0,a0,a1 + movb,tr,n t0,a0,LREF(l2) /* 10th inst. */ + +LSYM(l0) r__r_t0 /* add in this partial product */ +LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */ +LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ +LSYM(l3) blr t0,0 /* case on these 8 bits ****** */ + extru a1,23,24,a1 /* a1 >>= 8 ****************** */ + +/*16 insts before this. */ +/* a0 <<= 8 ************************** */ +LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop +LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop +LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop +LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0 +LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop +LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0 +LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN +LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0 +LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop +LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0 +LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN +LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 +LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN +LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 +LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0 +LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0 +LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN +LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0 +LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN +LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 +LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN +LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 +LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0 +LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 +LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 +LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0 +LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN +LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 +LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN +LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 +LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 +LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0 +LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0 +LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0 +LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0 +LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 +LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0 +LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 +LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0 +LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 +LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 +LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 +LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0 +LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN +LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0 +LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0 +LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0 +LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0 +LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 +LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0 +LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0 +LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 +LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0 +LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 +LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 +LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0 +LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0 +LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 +LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0 +LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0 +LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0 +LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0 +LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 +LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0 +LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0 +LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 +LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 +LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 +LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 +LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0 +LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0 +LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0 +LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0 +LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0 +LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0 +LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0 +LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 +LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0 +LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0 +LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0 +LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0 +LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0 +LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 +LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0 +LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0 +LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0 +LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0 +LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0 +LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0 +LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0 +LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0 +LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0 +LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 +LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0 +LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0 +LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0 +LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0 +LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0 +LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 +LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0 +LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 +LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0 +LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 +LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0 +LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0 +LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 +LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0 +LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 +LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0 +LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0 +LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0 +LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0 +LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0 +LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0 +LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0 +LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0 +LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 +LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0 +LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0 +LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0 +LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 +LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0 +LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0 +LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 +LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0 +LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0 +LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 +LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0 +LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0 +LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0 +LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0 +LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0 +LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0 +LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0 +LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0 +LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0 +LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0 +LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0 +LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0 +LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0 +LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0 +LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0 +LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0 +LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0 +LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0 +LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 +LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 +LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +/*1040 insts before this. */ +LSYM(ret_t0) MILLIRET +LSYM(e_t0) r__r_t0 +LSYM(e_shift) a1_ne_0_b_l2 + a0__256a0 /* a0 <<= 8 *********** */ + MILLIRETN +LSYM(e_t0ma0) a1_ne_0_b_l0 + t0__t0ma0 + MILLIRET + r__r_t0 +LSYM(e_t0a0) a1_ne_0_b_l0 + t0__t0_a0 + MILLIRET + r__r_t0 +LSYM(e_t02a0) a1_ne_0_b_l0 + t0__t0_2a0 + MILLIRET + r__r_t0 +LSYM(e_t04a0) a1_ne_0_b_l0 + t0__t0_4a0 + MILLIRET + r__r_t0 +LSYM(e_2t0) a1_ne_0_b_l1 + r__r_2t0 + MILLIRETN +LSYM(e_2t0a0) a1_ne_0_b_l0 + t0__2t0_a0 + MILLIRET + r__r_t0 +LSYM(e2t04a0) t0__t0_2a0 + a1_ne_0_b_l1 + r__r_2t0 + MILLIRETN +LSYM(e_3t0) a1_ne_0_b_l0 + t0__3t0 + MILLIRET + r__r_t0 +LSYM(e_4t0) a1_ne_0_b_l1 + r__r_4t0 + MILLIRETN +LSYM(e_4t0a0) a1_ne_0_b_l0 + t0__4t0_a0 + MILLIRET + r__r_t0 +LSYM(e4t08a0) t0__t0_2a0 + a1_ne_0_b_l1 + r__r_4t0 + MILLIRETN +LSYM(e_5t0) a1_ne_0_b_l0 + t0__5t0 + MILLIRET + r__r_t0 +LSYM(e_8t0) a1_ne_0_b_l1 + r__r_8t0 + MILLIRETN +LSYM(e_8t0a0) a1_ne_0_b_l0 + t0__8t0_a0 + MILLIRET + r__r_t0 + + .procend + .end +#endif diff --git a/gcc/config/pa/pa-64.h b/gcc/config/pa/pa-64.h new file mode 100644 index 000000000..67c8179c5 --- /dev/null +++ b/gcc/config/pa/pa-64.h @@ -0,0 +1,100 @@ +/* Definitions of target machine for GNU compiler, for HPs using the + 64bit runtime model. + Copyright (C) 1999, 2000, 2003, 2004, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* The default sizes for basic datatypes provided by GCC are not + correct for the PA64 runtime architecture. + + In PA64, basic types have the following sizes + + char 1 byte + short 2 bytes + int 4 bytes + long 8 bytes + long long 8 bytes + pointer 8 bytes + float 4 bytes + double 8 bytes + long double 16 bytes + size_t 8 bytes + ptrdiff_t 8 bytes + wchar 4 bytes + + Make GCC agree with types.h. */ +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "unsigned int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* If it is not listed here, then the default selected by GCC is OK. */ +#undef SHORT_TYPE_SIZE +#define SHORT_TYPE_SIZE 16 +#undef INT_TYPE_SIZE +#define INT_TYPE_SIZE 32 +#undef LONG_TYPE_SIZE +#define LONG_TYPE_SIZE 64 +#undef LONG_LONG_TYPE_SIZE +#define LONG_LONG_TYPE_SIZE 64 +#undef FLOAT_TYPE_SIZE +#define FLOAT_TYPE_SIZE 32 +#undef DOUBLE_TYPE_SIZE +#define DOUBLE_TYPE_SIZE 64 +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 128 + +/* Temporary until we figure out what to do with those *(&@$ 32bit + relocs which appear in stabs. */ +#undef DBX_DEBUGGING_INFO + +/* ?!? This needs to be made compile-time selectable. + + The PA64 runtime model has arguments that grow to higher addresses + (like most other targets). The older runtime model has arguments + that grow to lower addresses. What fun. */ +#undef ARGS_GROW_DOWNWARD + +/* If defined, a C expression which determines whether the default + implementation of va_arg will attempt to pad down before reading the + next argument, if that argument is smaller than its aligned space as + controlled by PARM_BOUNDARY. If this macro is not defined, all such + arguments are padded down when BYTES_BIG_ENDIAN is true. We don't + want aggregates padded down. */ + +#define PAD_VARARGS_DOWN (!AGGREGATE_TYPE_P (type)) + +/* In the PA architecture, it is not possible to directly move data + between GENERAL_REGS and FP_REGS. On the 32-bit port, we use the + location at SP-16 because PA 1.X only supports 5-bit immediates for + floating-point loads and stores. We don't expose this location in + the RTL to avoid scheduling related problems. For example, the + store and load could be separated by a call to a pure or const + function which has no frame and this function might also use SP-16. + We have 14-bit immediates on the 64-bit port, so we use secondary + memory for the copies. */ +#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ + (MAYBE_FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2) \ + || MAYBE_FP_REG_CLASS_P (CLASS2) != FP_REG_CLASS_P (CLASS1)) + diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h new file mode 100644 index 000000000..f167e133e --- /dev/null +++ b/gcc/config/pa/pa-hpux.h @@ -0,0 +1,119 @@ +/* Definitions of target machine for GNU compiler, for HP-UX. + Copyright (C) 1991, 1995, 1996, 2002, 2003, 2004, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* HP-UX UNIX features. */ +#undef TARGET_HPUX +#define TARGET_HPUX 1 + +#undef HPUX_LONG_DOUBLE_LIBRARY +#define HPUX_LONG_DOUBLE_LIBRARY 1 + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_BIG_SWITCH + +/* Make GCC agree with types.h. */ +#undef SIZE_TYPE +#undef PTRDIFF_TYPE + +#define SIZE_TYPE "unsigned int" +#define PTRDIFF_TYPE "int" + +#define LONG_DOUBLE_TYPE_SIZE 128 +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode) + +/* GCC always defines __STDC__. HP C++ compilers don't define it. This + causes trouble when sys/stdsyms.h is included. As a work around, + we define __STDC_EXT__. A similar situation exists with respect to + the definition of __cplusplus. We define _INCLUDE_LONGLONG + to prevent nlist.h from defining __STDC_32_MODE__ (no longlong + support). */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("system=hpux"); \ + builtin_assert ("system=unix"); \ + builtin_define ("__hp9000s800"); \ + builtin_define ("__hp9000s800__"); \ + builtin_define ("__hp9k8"); \ + builtin_define ("__hp9k8__"); \ + builtin_define ("__hpux"); \ + builtin_define ("__hpux__"); \ + builtin_define ("__unix"); \ + builtin_define ("__unix__"); \ + builtin_define ("__STDC_EXT__"); \ + if (c_dialect_cxx ()) \ + { \ + builtin_define ("_HPUX_SOURCE"); \ + builtin_define ("_INCLUDE_LONGLONG"); \ + } \ + else if (!flag_iso) \ + { \ + builtin_define ("_HPUX_SOURCE"); \ + if (preprocessing_trad_p ()) \ + { \ + builtin_define ("hp9000s800"); \ + builtin_define ("hp9k8"); \ + builtin_define ("hppa"); \ + builtin_define ("hpux"); \ + builtin_define ("unix"); \ + builtin_define ("__CLASSIC_C__"); \ + builtin_define ("_PWB"); \ + builtin_define ("PWB"); \ + } \ + } \ + if (TARGET_SIO) \ + builtin_define ("_SIO"); \ + else \ + { \ + builtin_define ("__hp9000s700"); \ + builtin_define ("__hp9000s700__"); \ + builtin_define ("_WSIO"); \ + } \ + } \ + while (0) + +/* Like the default, except no -lg. */ +#undef LIB_SPEC +#define LIB_SPEC "%{!shared:%{!p:%{!pg:-lc}}%{p: -L/lib/libp/ -lc}%{pg: -L/lib/libp/ -lc}}" + +#undef LINK_SPEC +#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_PA_11) +#define LINK_SPEC \ + "%{!mpa-risc-1-0:%{!march=1.0:%{static:-L/lib/pa1.1 -L/usr/lib/pa1.1 }}}%{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{g*:-a archive} %{shared:-b}" +#else +#define LINK_SPEC \ + "%{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{g*:-a archive} %{shared:-b}" +#endif + +/* hpux8 and later have C++ compatible include files, so do not + pretend they are `extern "C"'. */ +#define NO_IMPLICIT_EXTERN_C + +/* hpux11 and earlier don't have fputc_unlocked, so we must inhibit the + transformation of fputs_unlocked and fprintf_unlocked to fputc_unlocked. */ +#define DONT_HAVE_FPUTC_UNLOCKED + +/* We want the entry value of SP saved in the frame marker for + compatibility with the HP-UX unwind library. */ +#undef TARGET_HPUX_UNWIND_LIBRARY +#define TARGET_HPUX_UNWIND_LIBRARY 1 + +#define MD_UNWIND_SUPPORT "config/pa/hpux-unwind.h" diff --git a/gcc/config/pa/pa-hpux.opt b/gcc/config/pa/pa-hpux.opt new file mode 100644 index 000000000..eaed8be2d --- /dev/null +++ b/gcc/config/pa/pa-hpux.opt @@ -0,0 +1,37 @@ +; Options for the HP PA-RISC port of the compiler. + +; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +msio +Target RejectNegative Mask(SIO) MaskExists +Generate cpp defines for server IO + +munix=93 +Target RejectNegative +Specify UNIX standard for predefines and linking + +mwsio +Target RejectNegative InverseMask(SIO) +Generate cpp defines for workstation IO + +nolibdld +Driver + +rdynamic +Driver diff --git a/gcc/config/pa/pa-hpux10.h b/gcc/config/pa/pa-hpux10.h new file mode 100644 index 000000000..bfe09f247 --- /dev/null +++ b/gcc/config/pa/pa-hpux10.h @@ -0,0 +1,144 @@ +/* Definitions of target machine for GNU compiler, for HP PA-RISC + Copyright (C) 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2004, + 2007, 2008, 2010 Free Software Foundation, Inc. + Contributed by Tim Moore (moore@defmacro.cs.utah.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* GCC always defines __STDC__. HP C++ compilers don't define it. This + causes trouble when sys/stdsyms.h is included. As a work around, + we define __STDC_EXT__. A similar situation exists with respect to + the definition of __cplusplus. We define _INCLUDE_LONGLONG + to prevent nlist.h from defining __STDC_32_MODE__ (no longlong + support). We define __STDCPP__ to get certain system headers + (notably assert.h) to assume standard preprocessor behavior in C++. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("system=hpux"); \ + builtin_assert ("system=unix"); \ + builtin_define ("__hp9000s800"); \ + builtin_define ("__hp9000s800__"); \ + builtin_define ("__hpux"); \ + builtin_define ("__hpux__"); \ + builtin_define ("__unix"); \ + builtin_define ("__unix__"); \ + builtin_define ("__STDC_EXT__"); \ + if (c_dialect_cxx ()) \ + { \ + builtin_define ("_HPUX_SOURCE"); \ + builtin_define ("_REENTRANT"); \ + builtin_define ("_INCLUDE_LONGLONG"); \ + builtin_define ("__STDCPP__"); \ + } \ + else if (!flag_iso) \ + { \ + builtin_define ("_HPUX_SOURCE"); \ + builtin_define ("_REENTRANT"); \ + if (preprocessing_trad_p ()) \ + { \ + builtin_define ("hp9000s800"); \ + builtin_define ("hppa"); \ + builtin_define ("hpux"); \ + builtin_define ("unix"); \ + builtin_define ("__CLASSIC_C__"); \ + builtin_define ("_PWB"); \ + builtin_define ("PWB"); \ + } \ + } \ + if (flag_pa_unix >= 1995) \ + { \ + builtin_define ("_XOPEN_UNIX"); \ + builtin_define ("_XOPEN_SOURCE_EXTENDED"); \ + } \ + if (TARGET_SIO) \ + builtin_define ("_SIO"); \ + else \ + { \ + builtin_define ("__hp9000s700"); \ + builtin_define ("__hp9000s700__"); \ + builtin_define ("_WSIO"); \ + } \ + } \ + while (0) + +#define CPP_SPEC "%{threads: -D_REENTRANT -D_DCE_THREADS}" + +/* We can debug dynamically linked executables on hpux9; we also want + dereferencing of a NULL pointer to cause a SEGV. */ +#undef LINK_SPEC +#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_PA_11) +#define LINK_SPEC \ + "%{!mpa-risc-1-0:%{!march=1.0:%{static:-L/lib/pa1.1 -L/usr/lib/pa1.1 }}}\ + %{!shared:%{p:-L/lib/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{pg:-L/lib/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{!static:%{rdynamic:-E}}}\ + -z %{mlinker-opt:-O} %{!shared:-u main}\ + %{static:-a archive} %{shared:-b}" +#else +#define LINK_SPEC \ + "%{!shared:%{p:-L/lib/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{pg:-L/lib/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{!static:%{rdynamic:-E}}}\ + -z %{mlinker-opt:-O} %{!shared:-u main}\ + %{static:-a archive} %{shared:-b}" +#endif + +/* Like the default, except no -lg. */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{!shared:\ + %{!p:%{!pg:\ + %{!threads:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\ + %{threads:-lcma -lc}}}\ + %{p:%{!pg:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ + %{pg:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}" + +#undef THREAD_MODEL_SPEC +#define THREAD_MODEL_SPEC "%{!threads:single}%{threads:dce}" + +/* Under hpux10, the normal location of the `ld' and `as' programs is the + /usr/ccs/bin directory. */ + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_EXEC_PREFIX +#define MD_EXEC_PREFIX "/usr/ccs/bin/" +#endif + +/* Under hpux10, the normal location of the various *crt*.o files is + the /usr/ccs/lib directory. However, the profiling files are in + /opt/langtools/lib. */ + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_STARTFILE_PREFIX +#define MD_STARTFILE_PREFIX "/usr/ccs/lib/" +#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/" +#endif + +/* hpux10 has the new HP assembler. It's still lousy, but it's a whole lot + better than the assembler shipped with older versions of hpux. */ +#undef NEW_HP_ASSEMBLER +#define NEW_HP_ASSEMBLER 1 diff --git a/gcc/config/pa/pa-hpux10.opt b/gcc/config/pa/pa-hpux10.opt new file mode 100644 index 000000000..59056deba --- /dev/null +++ b/gcc/config/pa/pa-hpux10.opt @@ -0,0 +1,22 @@ +; Options specific to HP-UX 10. + +; Copyright (C) 2011 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +threads +Driver diff --git a/gcc/config/pa/pa-hpux1010.h b/gcc/config/pa/pa-hpux1010.h new file mode 100644 index 000000000..dfda771fa --- /dev/null +++ b/gcc/config/pa/pa-hpux1010.h @@ -0,0 +1,27 @@ +/* Definitions of target machine for GNU compiler, for HP PA-RISC + Copyright (C) 2004, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* HP-UX 10.10 UNIX 95 features. */ +#undef TARGET_HPUX_10_10 +#define TARGET_HPUX_10_10 1 + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \ + %{!munix=93:unix95%O%s}}" diff --git a/gcc/config/pa/pa-hpux1010.opt b/gcc/config/pa/pa-hpux1010.opt new file mode 100644 index 000000000..f409e8404 --- /dev/null +++ b/gcc/config/pa/pa-hpux1010.opt @@ -0,0 +1,23 @@ +; Options for the HP PA-RISC port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +munix=95 +Target RejectNegative +Specify UNIX standard for predefines and linking diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h new file mode 100644 index 000000000..81dfdf3d1 --- /dev/null +++ b/gcc/config/pa/pa-hpux11.h @@ -0,0 +1,189 @@ +/* Definitions of target machine for GNU compiler, for HP PA-RISC + Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, 2005, 2007, 2008, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* GCC always defines __STDC__. HP C++ compilers don't define it. This + causes trouble when sys/stdsyms.h is included. As a work around, + we define __STDC_EXT__. A similar situation exists with respect to + the definition of __cplusplus. We define _INCLUDE_LONGLONG + to prevent nlist.h from defining __STDC_32_MODE__ (no longlong + support). We define __STDCPP__ to get certain system headers + (notably assert.h) to assume standard preprocessor behavior in C++. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("system=hpux"); \ + builtin_assert ("system=unix"); \ + builtin_define ("__hp9000s800"); \ + builtin_define ("__hp9000s800__"); \ + builtin_define ("__hpux"); \ + builtin_define ("__hpux__"); \ + builtin_define ("__unix"); \ + builtin_define ("__unix__"); \ + builtin_define ("__STDC_EXT__"); \ + if (c_dialect_cxx ()) \ + { \ + builtin_define ("_HPUX_SOURCE"); \ + builtin_define ("_INCLUDE_LONGLONG"); \ + builtin_define ("__STDCPP__"); \ + } \ + else \ + { \ + if (!flag_iso) \ + { \ + builtin_define ("_HPUX_SOURCE"); \ + if (preprocessing_trad_p ()) \ + { \ + builtin_define ("hp9000s800"); \ + builtin_define ("hppa"); \ + builtin_define ("hpux"); \ + builtin_define ("unix"); \ + builtin_define ("__CLASSIC_C__"); \ + builtin_define ("_PWB"); \ + builtin_define ("PWB"); \ + } \ + } \ + } \ + if (!TARGET_64BIT) \ + builtin_define ("_ILP32"); \ + if (flag_pa_unix >= 1995 && !flag_iso) \ + { \ + builtin_define ("_XOPEN_UNIX"); \ + builtin_define ("_XOPEN_SOURCE_EXTENDED"); \ + } \ + if (TARGET_HPUX_11_11) \ + { \ + if (flag_pa_unix >= 1998) \ + { \ + if (flag_isoc94 || flag_isoc99 || c_dialect_cxx() \ + || !flag_iso) \ + builtin_define ("_INCLUDE__STDC_A1_SOURCE"); \ + if (!flag_iso) \ + builtin_define ("_INCLUDE_XOPEN_SOURCE_500"); \ + } \ + else if (flag_isoc94 || flag_isoc99 || c_dialect_cxx ()) \ + warning (0, "-munix=98 option required for C89 " \ + "Amendment 1 features.\n"); \ + } \ + if (TARGET_SIO) \ + builtin_define ("_SIO"); \ + else \ + { \ + builtin_define ("__hp9000s700"); \ + builtin_define ("__hp9000s700__"); \ + builtin_define ("_WSIO"); \ + } \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC \ + "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}" +/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD. These + affect only aCC's C++ library (Rogue Wave-derived) which we do not + use, and they violate the user's name space. */ + +/* We can debug dynamically linked executables on hpux11; we also + want dereferencing of a NULL pointer to cause a SEGV. */ +#undef LINK_SPEC +#define LINK_SPEC \ + "%{!shared:%{p:-L/lib/libp -L/usr/lib/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{pg:-L/lib/libp -L/usr/lib/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{!static:%{rdynamic:-E}}}\ + -z %{mlinker-opt:-O} %{!shared:-u main -u __gcc_plt_call}\ + %{static:-a archive} %{shared:-b}" + +/* HP-UX 11 has posix threads. HP's shared libc contains pthread stubs + so that non-threaded applications can be linked with a thread-safe + libc without a subsequent loss of performance. For more details, + see <http://docs.hp.com/en/1896/pthreads.html>. */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{!shared:\ + %{fopenmp:%{static:-a archive_shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}\ + %{shared:%{mt|pthread:-lpthread}}" + +/* The libgcc_stub.a library needs to come last. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%G %L %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}}}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \ + %{!munix=93:unix95%O%s}}" + +/* Under hpux11, the normal location of the `ld' and `as' programs is the + /usr/ccs/bin directory. */ + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_EXEC_PREFIX +#define MD_EXEC_PREFIX "/usr/ccs/bin/" +#endif + +/* Under hpux11 the normal location of the various *crt*.o files is + the /usr/ccs/lib directory. However, the profiling files are in + /opt/langtools/lib. */ + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_STARTFILE_PREFIX +#define MD_STARTFILE_PREFIX "/usr/ccs/lib/" +#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/" +#endif + +/* hpux11 has the new HP assembler. It's still lousy, but it's a whole lot + better than the assembler shipped with older versions of hpux. */ +#undef NEW_HP_ASSEMBLER +#define NEW_HP_ASSEMBLER 1 + +/* Make GCC agree with types.h. */ +#undef SIZE_TYPE +#undef PTRDIFF_TYPE + +#define SIZE_TYPE "long unsigned int" +#define PTRDIFF_TYPE "long int" + +/* HP-UX 11.0 and above provides initialization and finalization function + support from linker command line. We don't need to invoke __main to run + constructors. We also don't need chatr to determine the dependencies of + dynamically linked executables and shared libraries. */ +#undef LDD_SUFFIX +#undef PARSE_LDD_OUTPUT +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION 1 +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "+init" +#undef LD_FINI_SWITCH +#define LD_FINI_SWITCH "+fini" + +/* The HP-UX 11.X SOM linker (ld32) can successfully link shared libraries + with secondary definition (weak) symbols. */ +#undef TARGET_SOM_SDEF +#define TARGET_SOM_SDEF 1 + +#undef TARGET_HPUX_11 +#define TARGET_HPUX_11 1 diff --git a/gcc/config/pa/pa-hpux1111.h b/gcc/config/pa/pa-hpux1111.h new file mode 100644 index 000000000..e47d08c7e --- /dev/null +++ b/gcc/config/pa/pa-hpux1111.h @@ -0,0 +1,27 @@ +/* Definitions of target machine for GNU compiler, for HP PA-RISC + Copyright (C) 2004, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* HP-UX 11i multibyte and UNIX 98 extensions. */ +#undef TARGET_HPUX_11_11 +#define TARGET_HPUX_11_11 1 + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \ + %{munix=95:unix95%O%s}%{!munix=93:%{!munix=95:unix98%O%s}}}" diff --git a/gcc/config/pa/pa-hpux1111.opt b/gcc/config/pa/pa-hpux1111.opt new file mode 100644 index 000000000..b59f64a15 --- /dev/null +++ b/gcc/config/pa/pa-hpux1111.opt @@ -0,0 +1,23 @@ +; Options for the HP PA-RISC port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +munix=98 +Target RejectNegative +Specify UNIX standard for predefines and linking diff --git a/gcc/config/pa/pa-linux.h b/gcc/config/pa/pa-linux.h new file mode 100644 index 000000000..64626e6fc --- /dev/null +++ b/gcc/config/pa/pa-linux.h @@ -0,0 +1,138 @@ +/* Definitions for PA_RISC with ELF format + Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, + 2011 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + builtin_assert ("machine=bigendian"); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" + +#undef ASM_SPEC +#define ASM_SPEC \ + "" + +/* Define this for shared library support because it isn't in the main + linux.h file. */ + +#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" + +#undef LINK_SPEC +#define LINK_SPEC "\ + %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER "} \ + %{static:-static}}" + +/* glibc's profiling functions don't need gcc to allocate counters. */ +#define NO_DEFERRED_PROFILE_COUNTERS 1 + +/* Define the strings used for the special svr4 .type and .size directives. + These strings generally do not vary from one system running svr4 to + another, but if a given system (e.g. m88k running svr) needs to use + different pseudo-op names for these, they may be overridden in the + file which includes this one. */ + +#undef STRING_ASM_OP +#define STRING_ASM_OP "\t.stringz\t" + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + +#define TARGET_ASM_FILE_START pa_linux_file_start + +/* We want local labels to start with period if made with asm_fprintf. */ +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* Define these to generate the Linux/ELF/SysV style of internal + labels all the time - i.e. to be compatible with + ASM_GENERATE_INTERNAL_LABEL in <elfos.h>. Compare these with the + ones in pa.h and note the lack of dollar signs in these. FIXME: + shouldn't we fix pa.h to use ASM_GENERATE_INTERNAL_LABEL instead? */ + +#undef ASM_OUTPUT_ADDR_VEC_ELT +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + if (TARGET_BIG_SWITCH) \ + fprintf (FILE, "\t.word .L%d\n", VALUE); \ + else \ + fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE) + +#undef ASM_OUTPUT_ADDR_DIFF_ELT +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + if (TARGET_BIG_SWITCH) \ + fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL); \ + else \ + fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE) + +/* Use the default. */ +#undef ASM_OUTPUT_LABEL + +/* NOTE: (*targetm.asm_out.internal_label)() is defined for us by elfos.h, and + does what we want (i.e. uses colons). It must be compatible with + ASM_GENERATE_INTERNAL_LABEL(), so do not define it here. */ + +/* Use the default. */ +#undef ASM_OUTPUT_INTERNAL_LABEL + +/* Use the default. */ +#undef TARGET_ASM_GLOBALIZE_LABEL +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP ".globl " + +/* FIXME: Hacked from the <elfos.h> one so that we avoid multiple + labels in a function declaration (since pa.c seems determined to do + it differently) */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + } \ + while (0) + +/* As well as globalizing the label, we need to encode the label + to ensure a plabel is generated in an indirect call. */ + +#undef ASM_OUTPUT_EXTERNAL_LIBCALL +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ + do \ + { \ + if (!FUNCTION_NAME_P (XSTR (FUN, 0))) \ + hppa_encode_label (FUN); \ + (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0)); \ + } \ + while (0) + +/* Linux always uses gas. */ +#undef TARGET_GAS +#define TARGET_GAS 1 diff --git a/gcc/config/pa/pa-modes.def b/gcc/config/pa/pa-modes.def new file mode 100644 index 000000000..6a2368c7a --- /dev/null +++ b/gcc/config/pa/pa-modes.def @@ -0,0 +1,32 @@ +/* Definitions of target machine for GNU compiler, for the HP Spectrum. + Copyright (C) 2002, 2003, 2006, 2007 Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support + and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for + Software Science at the University of Utah. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* PA-RISC has the same reversed quiet bit as MIPS. + ??? Why is this called the MIPS format. */ +RESET_FLOAT_FORMAT (SF, mips_single_format); +RESET_FLOAT_FORMAT (DF, mips_double_format); + +/* TFmode: IEEE quad floating point (software). */ +FLOAT_MODE (TF, 16, mips_quad_format); + +/* HPPA floating comparisons produce distinct condition codes. */ +CC_MODE (CCFP); diff --git a/gcc/config/pa/pa-protos.h b/gcc/config/pa/pa-protos.h new file mode 100644 index 000000000..53dcda009 --- /dev/null +++ b/gcc/config/pa/pa-protos.h @@ -0,0 +1,172 @@ +/* Prototypes for pa.c functions used in the md file & elsewhere. + Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2010 + Free Software Foundation, + Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifdef RTX_CODE +/* Prototype function used in various macros. */ +extern int symbolic_operand (rtx, enum machine_mode); +extern int tls_symbolic_operand (rtx); +extern rtx pa_eh_return_handler_rtx (void); + +/* Used in insn-*.c. */ +extern int following_call (rtx); +extern int function_label_operand (rtx, enum machine_mode); +extern int lhs_lshift_cint_operand (rtx, enum machine_mode); + +/* Define functions in pa.c and used in insn-output.c. */ + +extern const char *output_and (rtx *); +extern const char *output_ior (rtx *); +extern const char *output_move_double (rtx *); +extern const char *output_fp_move_double (rtx *); +extern const char *output_block_move (rtx *, int); +extern const char *output_block_clear (rtx *, int); +extern const char *output_cbranch (rtx *, int, rtx); +extern const char *output_lbranch (rtx, rtx, int); +extern const char *output_bb (rtx *, int, rtx, int); +extern const char *output_bvb (rtx *, int, rtx, int); +extern const char *output_dbra (rtx *, rtx, int); +extern const char *output_movb (rtx *, rtx, int, int); +extern const char *output_parallel_movb (rtx *, rtx); +extern const char *output_parallel_addb (rtx *, rtx); +extern const char *output_call (rtx, rtx, int); +extern const char *output_indirect_call (rtx, rtx); +extern const char *output_millicode_call (rtx, rtx); +extern const char *output_mul_insn (int, rtx); +extern const char *output_div_insn (rtx *, int, rtx); +extern const char *output_mod_insn (int, rtx); +extern const char *singlemove_string (rtx *); +extern void output_arg_descriptor (rtx); +extern void output_global_address (FILE *, rtx, int); +extern void print_operand (FILE *, rtx, int); +extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); +extern void hppa_encode_label (rtx); +extern int arith11_operand (rtx, enum machine_mode); +extern int adddi3_operand (rtx, enum machine_mode); +extern int indexed_memory_operand (rtx, enum machine_mode); +extern int symbolic_expression_p (rtx); +extern int symbolic_memory_operand (rtx, enum machine_mode); +extern bool pa_tls_referenced_p (rtx); +extern int pa_adjust_insn_length (rtx, int); +extern int int11_operand (rtx, enum machine_mode); +extern int reg_or_cint_move_operand (rtx, enum machine_mode); +extern int arith5_operand (rtx, enum machine_mode); +extern int uint5_operand (rtx, enum machine_mode); +extern int pic_label_operand (rtx, enum machine_mode); +extern int plus_xor_ior_operator (rtx, enum machine_mode); +extern int borx_reg_operand (rtx, enum machine_mode); +extern int shadd_operand (rtx, enum machine_mode); +extern int arith_operand (rtx, enum machine_mode); +extern int read_only_operand (rtx, enum machine_mode); +extern int move_dest_operand (rtx, enum machine_mode); +extern int move_src_operand (rtx, enum machine_mode); +extern int prefetch_cc_operand (rtx, enum machine_mode); +extern int prefetch_nocc_operand (rtx, enum machine_mode); +extern int and_operand (rtx, enum machine_mode); +extern int arith32_operand (rtx, enum machine_mode); +extern int uint32_operand (rtx, enum machine_mode); +extern int reg_before_reload_operand (rtx, enum machine_mode); +extern int reg_or_0_operand (rtx, enum machine_mode); +extern int reg_or_0_or_nonsymb_mem_operand (rtx, enum machine_mode); +extern int pre_cint_operand (rtx, enum machine_mode); +extern int post_cint_operand (rtx, enum machine_mode); +extern int div_operand (rtx, enum machine_mode); +extern int int5_operand (rtx, enum machine_mode); +extern int movb_comparison_operator (rtx, enum machine_mode); +extern int ireg_or_int5_operand (rtx, enum machine_mode); +extern int fmpyaddoperands (rtx *); +extern int fmpysuboperands (rtx *); +extern int call_operand_address (rtx, enum machine_mode); +extern void emit_bcond_fp (rtx[]); +extern int emit_move_sequence (rtx *, enum machine_mode, rtx); +extern int emit_hpdiv_const (rtx *, int); +extern int is_function_label_plus_const (rtx); +extern int jump_in_call_delay (rtx); +extern int hppa_fpstore_bypass_p (rtx, rtx); +extern int attr_length_millicode_call (rtx); +extern int attr_length_call (rtx, int); +extern int attr_length_indirect_call (rtx); +extern int attr_length_save_restore_dltp (rtx); + +/* Declare functions defined in pa.c and used in templates. */ + +extern struct rtx_def *return_addr_rtx (int, rtx); + +extern int fp_reg_operand (rtx, enum machine_mode); +extern int arith_double_operand (rtx, enum machine_mode); +extern int ireg_operand (rtx, enum machine_mode); +extern int lhs_lshift_operand (rtx, enum machine_mode); +extern int pc_or_label_operand (rtx, enum machine_mode); +#ifdef ARGS_SIZE_RTX +/* expr.h defines ARGS_SIZE_RTX and `enum direction' */ +#ifdef TREE_CODE +extern enum direction function_arg_padding (enum machine_mode, const_tree); +#endif +#endif /* ARGS_SIZE_RTX */ +extern int non_hard_reg_operand (rtx, enum machine_mode); +extern int eq_neq_comparison_operator (rtx, enum machine_mode); +extern int insn_refs_are_delayed (rtx); +extern rtx get_deferred_plabel (rtx); +#endif /* RTX_CODE */ + +extern int integer_store_memory_operand (rtx, enum machine_mode); +extern int ldil_cint_p (HOST_WIDE_INT); +extern int zdepi_cint_p (unsigned HOST_WIDE_INT); + +extern void output_ascii (FILE *, const char *, int); +extern HOST_WIDE_INT compute_frame_size (HOST_WIDE_INT, int *); +extern int and_mask_p (unsigned HOST_WIDE_INT); +extern int cint_ok_for_move (HOST_WIDE_INT); +extern void hppa_expand_prologue (void); +extern void hppa_expand_epilogue (void); +extern bool pa_can_use_return_insn (void); +extern int ior_mask_p (unsigned HOST_WIDE_INT); +extern void compute_zdepdi_operands (unsigned HOST_WIDE_INT, + unsigned *); +#ifdef RTX_CODE +extern const char * output_64bit_and (rtx *); +extern const char * output_64bit_ior (rtx *); +extern int cmpib_comparison_operator (rtx, enum machine_mode); +#endif + + +/* Miscellaneous functions in pa.c. */ +#ifdef TREE_CODE +extern int reloc_needed (tree); +extern bool pa_return_in_memory (const_tree, const_tree); +#endif /* TREE_CODE */ + +extern void pa_asm_output_aligned_bss (FILE *, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern void pa_asm_output_aligned_common (FILE *, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern void pa_asm_output_aligned_local (FILE *, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern void pa_hpux_asm_output_external (FILE *, tree, const char *); +extern bool pa_cannot_change_mode_class (enum machine_mode, enum machine_mode, + enum reg_class); +extern bool pa_modes_tieable_p (enum machine_mode, enum machine_mode); +extern HOST_WIDE_INT pa_initial_elimination_offset (int, int); + +extern const int magic_milli[]; +extern int shadd_constant_p (int); diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c new file mode 100644 index 000000000..8a4445fdc --- /dev/null +++ b/gcc/config/pa/pa.c @@ -0,0 +1,10471 @@ +/* Subroutines for insn-output.c for HPPA. + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "output.h" +#include "except.h" +#include "expr.h" +#include "optabs.h" +#include "reload.h" +#include "integrate.h" +#include "function.h" +#include "diagnostic-core.h" +#include "ggc.h" +#include "recog.h" +#include "predict.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "langhooks.h" +#include "df.h" + +/* Return nonzero if there is a bypass for the output of + OUT_INSN and the fp store IN_INSN. */ +int +hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn) +{ + enum machine_mode store_mode; + enum machine_mode other_mode; + rtx set; + + if (recog_memoized (in_insn) < 0 + || (get_attr_type (in_insn) != TYPE_FPSTORE + && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) + || recog_memoized (out_insn) < 0) + return 0; + + store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); + + set = single_set (out_insn); + if (!set) + return 0; + + other_mode = GET_MODE (SET_SRC (set)); + + return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); +} + + +#ifndef DO_FRAME_NOTES +#ifdef INCOMING_RETURN_ADDR_RTX +#define DO_FRAME_NOTES 1 +#else +#define DO_FRAME_NOTES 0 +#endif +#endif + +static void pa_option_override (void); +static void copy_reg_pointer (rtx, rtx); +static void fix_range (const char *); +static bool pa_handle_option (size_t, const char *, int); +static int hppa_register_move_cost (enum machine_mode mode, reg_class_t, + reg_class_t); +static int hppa_address_cost (rtx, bool); +static bool hppa_rtx_costs (rtx, int, int, int *, bool); +static inline rtx force_mode (enum machine_mode, rtx); +static void pa_reorg (void); +static void pa_combine_instructions (void); +static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx); +static bool forward_branch_p (rtx); +static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); +static int compute_movmem_length (rtx); +static int compute_clrmem_length (rtx); +static bool pa_assemble_integer (rtx, unsigned int, int); +static void remove_useless_addtr_insns (int); +static void store_reg (int, HOST_WIDE_INT, int); +static void store_reg_modify (int, int, HOST_WIDE_INT); +static void load_reg (int, HOST_WIDE_INT, int); +static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); +static rtx pa_function_value (const_tree, const_tree, bool); +static rtx pa_libcall_value (enum machine_mode, const_rtx); +static bool pa_function_value_regno_p (const unsigned int); +static void pa_output_function_prologue (FILE *, HOST_WIDE_INT); +static void update_total_code_bytes (unsigned int); +static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT); +static int pa_adjust_cost (rtx, rtx, rtx, int); +static int pa_adjust_priority (rtx, int); +static int pa_issue_rate (void); +static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; +static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) + ATTRIBUTE_UNUSED; +static void pa_encode_section_info (tree, rtx, int); +static const char *pa_strip_name_encoding (const char *); +static bool pa_function_ok_for_sibcall (tree, tree); +static void pa_globalize_label (FILE *, const char *) + ATTRIBUTE_UNUSED; +static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree); +#if !defined(USE_COLLECT2) +static void pa_asm_out_constructor (rtx, int); +static void pa_asm_out_destructor (rtx, int); +#endif +static void pa_init_builtins (void); +static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int); +static rtx hppa_builtin_saveregs (void); +static void hppa_va_start (tree, rtx); +static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); +static bool pa_scalar_mode_supported_p (enum machine_mode); +static bool pa_commutative_p (const_rtx x, int outer_code); +static void copy_fp_args (rtx) ATTRIBUTE_UNUSED; +static int length_fp_args (rtx) ATTRIBUTE_UNUSED; +static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode); +static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; +static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; +static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; +static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; +static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; +static void pa_som_file_start (void) ATTRIBUTE_UNUSED; +static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; +static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; +static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; +static void output_deferred_plabels (void); +static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; +#ifdef ASM_OUTPUT_EXTERNAL_REAL +static void pa_hpux_file_end (void); +#endif +#if HPUX_LONG_DOUBLE_LIBRARY +static void pa_hpux_init_libfuncs (void); +#endif +static rtx pa_struct_value_rtx (tree, int); +static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree); +static struct machine_function * pa_init_machine_status (void); +static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t, + enum machine_mode, + secondary_reload_info *); +static void pa_extra_live_on_entry (bitmap); +static enum machine_mode pa_promote_function_mode (const_tree, + enum machine_mode, int *, + const_tree, int); + +static void pa_asm_trampoline_template (FILE *); +static void pa_trampoline_init (rtx, tree, rtx); +static rtx pa_trampoline_adjust_address (rtx); +static rtx pa_delegitimize_address (rtx); +static bool pa_print_operand_punct_valid_p (unsigned char); +static rtx pa_internal_arg_pointer (void); +static bool pa_can_eliminate (const int, const int); +static void pa_conditional_register_usage (void); +static enum machine_mode pa_c_mode_for_suffix (char); +static section *pa_function_section (tree, enum node_frequency, bool, bool); +static unsigned int pa_section_type_flags (tree, const char *, int); + +/* The following extra sections are only used for SOM. */ +static GTY(()) section *som_readonly_data_section; +static GTY(()) section *som_one_only_readonly_data_section; +static GTY(()) section *som_one_only_data_section; + +/* Which cpu we are scheduling for. */ +enum processor_type pa_cpu = TARGET_SCHED_DEFAULT; + +/* The UNIX standard to use for predefines and linking. */ +int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993; + +/* Counts for the number of callee-saved general and floating point + registers which were saved by the current function's prologue. */ +static int gr_saved, fr_saved; + +/* Boolean indicating whether the return pointer was saved by the + current function's prologue. */ +static bool rp_saved; + +static rtx find_addr_reg (rtx); + +/* Keep track of the number of bytes we have output in the CODE subspace + during this compilation so we'll know when to emit inline long-calls. */ +unsigned long total_code_bytes; + +/* The last address of the previous function plus the number of bytes in + associated thunks that have been output. This is used to determine if + a thunk can use an IA-relative branch to reach its target function. */ +static unsigned int last_address; + +/* Variables to handle plabels that we discover are necessary at assembly + output time. They are output after the current function. */ +struct GTY(()) deferred_plabel +{ + rtx internal_label; + rtx symbol; +}; +static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * + deferred_plabels; +static size_t n_deferred_plabels = 0; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options pa_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + + +/* Initialize the GCC target structure. */ + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE pa_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE pa_option_optimization_table + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER pa_assemble_integer + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE pa_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE pa_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST pa_adjust_cost +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE pa_issue_rate + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall + +#undef TARGET_COMMUTATIVE_P +#define TARGET_COMMUTATIVE_P pa_commutative_p + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +#undef TARGET_ASM_FILE_END +#ifdef ASM_OUTPUT_EXTERNAL_REAL +#define TARGET_ASM_FILE_END pa_hpux_file_end +#else +#define TARGET_ASM_FILE_END output_deferred_plabels +#endif + +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p + +#if !defined(USE_COLLECT2) +#undef TARGET_ASM_CONSTRUCTOR +#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor +#undef TARGET_ASM_DESTRUCTOR +#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor +#endif + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT) +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION pa_handle_option + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS pa_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN pa_expand_builtin + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS hppa_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hppa_address_cost + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg + +#if HPUX_LONG_DOUBLE_LIBRARY +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs +#endif + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY pa_return_in_memory +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference +#undef TARGET_CALLEE_COPIES +#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG pa_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary + +#undef TARGET_EXPAND_BUILTIN_SAVEREGS +#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD pa_secondary_reload + +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT pa_trampoline_init +#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address +#undef TARGET_INTERNAL_ARG_POINTER +#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE pa_can_eliminate +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage +#undef TARGET_C_MODE_FOR_SUFFIX +#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix +#undef TARGET_ASM_FUNCTION_SECTION +#define TARGET_ASM_FUNCTION_SECTION pa_function_section + +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Parse the -mfixed-range= option string. */ + +static void +fix_range (const char *const_str) +{ + int i, first, last; + char *str, *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. This is + used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ + + i = strlen (const_str); + str = (char *) alloca (i + 1); + memcpy (str, const_str, i + 1); + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning (0, "value of -mfixed-range must have form REG1-REG2"); + return; + } + *dash = '\0'; + + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning (0, "unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning (0, "unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning (0, "%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = call_used_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } + + /* Check if all floating point registers have been fixed. */ + for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) + if (!fixed_regs[i]) + break; + + if (i > FP_REG_LAST) + target_flags |= MASK_DISABLE_FPREGS; +} + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_mnosnake: + case OPT_mpa_risc_1_0: + case OPT_march_1_0: + target_flags &= ~(MASK_PA_11 | MASK_PA_20); + return true; + + case OPT_msnake: + case OPT_mpa_risc_1_1: + case OPT_march_1_1: + target_flags &= ~MASK_PA_20; + target_flags |= MASK_PA_11; + return true; + + case OPT_mpa_risc_2_0: + case OPT_march_2_0: + target_flags |= MASK_PA_11 | MASK_PA_20; + return true; + + case OPT_mschedule_: + if (strcmp (arg, "8000") == 0) + pa_cpu = PROCESSOR_8000; + else if (strcmp (arg, "7100") == 0) + pa_cpu = PROCESSOR_7100; + else if (strcmp (arg, "700") == 0) + pa_cpu = PROCESSOR_700; + else if (strcmp (arg, "7100LC") == 0) + pa_cpu = PROCESSOR_7100LC; + else if (strcmp (arg, "7200") == 0) + pa_cpu = PROCESSOR_7200; + else if (strcmp (arg, "7300") == 0) + pa_cpu = PROCESSOR_7300; + else + return false; + return true; + + case OPT_mfixed_range_: + fix_range (arg); + return true; + +#if TARGET_HPUX + case OPT_munix_93: + flag_pa_unix = 1993; + return true; +#endif + +#if TARGET_HPUX_10_10 + case OPT_munix_95: + flag_pa_unix = 1995; + return true; +#endif + +#if TARGET_HPUX_11_11 + case OPT_munix_98: + flag_pa_unix = 1998; + return true; +#endif + + default: + return true; + } +} + +/* Implement the TARGET_OPTION_OVERRIDE hook. */ + +static void +pa_option_override (void) +{ + /* Unconditional branches in the delay slot are not compatible with dwarf2 + call frame information. There is no benefit in using this optimization + on PA8000 and later processors. */ + if (pa_cpu >= PROCESSOR_8000 + || (targetm.except_unwind_info (&global_options) == UI_DWARF2 + && flag_exceptions) + || flag_unwind_tables) + target_flags &= ~MASK_JUMP_IN_DELAY; + + if (flag_pic && TARGET_PORTABLE_RUNTIME) + { + warning (0, "PIC code generation is not supported in the portable runtime model"); + } + + if (flag_pic && TARGET_FAST_INDIRECT_CALLS) + { + warning (0, "PIC code generation is not compatible with fast indirect calls"); + } + + if (! TARGET_GAS && write_symbols != NO_DEBUG) + { + warning (0, "-g is only supported when using GAS on this processor,"); + warning (0, "-g option disabled"); + write_symbols = NO_DEBUG; + } + + /* We only support the "big PIC" model now. And we always generate PIC + code when in 64bit mode. */ + if (flag_pic == 1 || TARGET_64BIT) + flag_pic = 2; + + /* Disable -freorder-blocks-and-partition as we don't support hot and + cold partitioning. */ + if (flag_reorder_blocks_and_partition) + { + inform (input_location, + "-freorder-blocks-and-partition does not work " + "on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + + /* We can't guarantee that .dword is available for 32-bit targets. */ + if (UNITS_PER_WORD == 4) + targetm.asm_out.aligned_op.di = NULL; + + /* The unaligned ops are only available when using GAS. */ + if (!TARGET_GAS) + { + targetm.asm_out.unaligned_op.hi = NULL; + targetm.asm_out.unaligned_op.si = NULL; + targetm.asm_out.unaligned_op.di = NULL; + } + + init_machine_status = pa_init_machine_status; +} + +enum pa_builtins +{ + PA_BUILTIN_COPYSIGNQ, + PA_BUILTIN_FABSQ, + PA_BUILTIN_INFQ, + PA_BUILTIN_HUGE_VALQ, + PA_BUILTIN_max +}; + +static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max]; + +static void +pa_init_builtins (void) +{ +#ifdef DONT_HAVE_FPUTC_UNLOCKED + built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = + built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED]; + implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] + = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED]; +#endif +#if TARGET_HPUX_11 + if (built_in_decls [BUILT_IN_FINITE]) + set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite"); + if (built_in_decls [BUILT_IN_FINITEF]) + set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef"); +#endif + + if (HPUX_LONG_DOUBLE_LIBRARY) + { + tree decl, ftype; + + /* Under HPUX, the __float128 type is a synonym for "long double". */ + (*lang_hooks.types.register_builtin_type) (long_double_type_node, + "__float128"); + + /* TFmode support builtins. */ + ftype = build_function_type_list (long_double_type_node, + long_double_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_fabsq", ftype, + PA_BUILTIN_FABSQ, BUILT_IN_MD, + "_U_Qfabs", NULL_TREE); + TREE_READONLY (decl) = 1; + pa_builtins[PA_BUILTIN_FABSQ] = decl; + + ftype = build_function_type_list (long_double_type_node, + long_double_type_node, + long_double_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_copysignq", ftype, + PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD, + "_U_Qfcopysign", NULL_TREE); + TREE_READONLY (decl) = 1; + pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl; + + ftype = build_function_type (long_double_type_node, void_list_node); + decl = add_builtin_function ("__builtin_infq", ftype, + PA_BUILTIN_INFQ, BUILT_IN_MD, + NULL, NULL_TREE); + pa_builtins[PA_BUILTIN_INFQ] = decl; + + decl = add_builtin_function ("__builtin_huge_valq", ftype, + PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD, + NULL, NULL_TREE); + pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl; + } +} + +static rtx +pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + switch (fcode) + { + case PA_BUILTIN_FABSQ: + case PA_BUILTIN_COPYSIGNQ: + return expand_call (exp, target, ignore); + + case PA_BUILTIN_INFQ: + case PA_BUILTIN_HUGE_VALQ: + { + enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); + REAL_VALUE_TYPE inf; + rtx tmp; + + real_inf (&inf); + tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode); + + tmp = validize_mem (force_const_mem (target_mode, tmp)); + + if (target == 0) + target = gen_reg_rtx (target_mode); + + emit_move_insn (target, tmp); + return target; + } + + default: + gcc_unreachable (); + } + + return NULL_RTX; +} + +/* Function to init struct machine_function. + This will be called, via a pointer variable, + from push_function_context. */ + +static struct machine_function * +pa_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* If FROM is a probable pointer register, mark TO as a probable + pointer register with the same pointer alignment as FROM. */ + +static void +copy_reg_pointer (rtx to, rtx from) +{ + if (REG_POINTER (from)) + mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); +} + +/* Return 1 if X contains a symbolic expression. We know these + expressions will have one of a few well defined forms, so + we need only check those forms. */ +int +symbolic_expression_p (rtx x) +{ + + /* Strip off any HIGH. */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + return (symbolic_operand (x, VOIDmode)); +} + +/* Accept any constant that can be moved in one instruction into a + general register. */ +int +cint_ok_for_move (HOST_WIDE_INT ival) +{ + /* OK if ldo, ldil, or zdepi, can be used. */ + return (VAL_14_BITS_P (ival) + || ldil_cint_p (ival) + || zdepi_cint_p (ival)); +} + +/* Return truth value of whether OP can be used as an operand in a + adddi3 insn. */ +int +adddi3_operand (rtx op, enum machine_mode mode) +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT + && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op)))); +} + +/* True iff the operand OP can be used as the destination operand of + an integer store. This also implies the operand could be used as + the source operand of an integer load. Symbolic, lo_sum and indexed + memory operands are not allowed. We accept reloading pseudos and + other memory operands. */ +int +integer_store_memory_operand (rtx op, enum machine_mode mode) +{ + return ((reload_in_progress + && REG_P (op) + && REGNO (op) >= FIRST_PSEUDO_REGISTER + && reg_renumber [REGNO (op)] < 0) + || (GET_CODE (op) == MEM + && (reload_in_progress || memory_address_p (mode, XEXP (op, 0))) + && !symbolic_memory_operand (op, VOIDmode) + && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0)) + && !IS_INDEX_ADDR_P (XEXP (op, 0)))); +} + +/* True iff ldil can be used to load this CONST_INT. The least + significant 11 bits of the value must be zero and the value must + not change sign when extended from 32 to 64 bits. */ +int +ldil_cint_p (HOST_WIDE_INT ival) +{ + HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff); + + return x == 0 || x == ((HOST_WIDE_INT) -1 << 31); +} + +/* True iff zdepi can be used to generate this CONST_INT. + zdepi first sign extends a 5-bit signed number to a given field + length, then places this field anywhere in a zero. */ +int +zdepi_cint_p (unsigned HOST_WIDE_INT x) +{ + unsigned HOST_WIDE_INT lsb_mask, t; + + /* This might not be obvious, but it's at least fast. + This function is critical; we don't have the time loops would take. */ + lsb_mask = x & -x; + t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); + /* Return true iff t is a power of two. */ + return ((t & (t - 1)) == 0); +} + +/* True iff depi or extru can be used to compute (reg & mask). + Accept bit pattern like these: + 0....01....1 + 1....10....0 + 1..10..01..1 */ +int +and_mask_p (unsigned HOST_WIDE_INT mask) +{ + mask = ~mask; + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* True iff depi can be used to compute (reg | MASK). */ +int +ior_mask_p (unsigned HOST_WIDE_INT mask) +{ + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* Legitimize PIC addresses. If the address is already + position-independent, we return ORIG. Newly generated + position-independent addresses go to REG. If we need more + than one register, we lose. */ + +rtx +legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +{ + rtx pic_ref = orig; + + gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); + + /* Labels need special handling. */ + if (pic_label_operand (orig, mode)) + { + rtx insn; + + /* We do not want to go through the movXX expanders here since that + would create recursion. + + Nor do we really want to call a generator for a named pattern + since that requires multiple patterns if we want to support + multiple word sizes. + + So instead we just emit the raw set, which avoids the movXX + expanders completely. */ + mark_reg_pointer (reg, BITS_PER_UNIT); + insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig)); + + /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ + add_reg_note (insn, REG_EQUAL, orig); + + /* During and after reload, we need to generate a REG_LABEL_OPERAND note + and update LABEL_NUSES because this is not done automatically. */ + if (reload_in_progress || reload_completed) + { + /* Extract LABEL_REF. */ + if (GET_CODE (orig) == CONST) + orig = XEXP (XEXP (orig, 0), 0); + /* Extract CODE_LABEL. */ + orig = XEXP (orig, 0); + add_reg_note (insn, REG_LABEL_OPERAND, orig); + LABEL_NUSES (orig)++; + } + crtl->uses_pic_offset_table = 1; + return reg; + } + if (GET_CODE (orig) == SYMBOL_REF) + { + rtx insn, tmp_reg; + + gcc_assert (reg); + + /* Before reload, allocate a temporary register for the intermediate + result. This allows the sequence to be deleted when the final + result is unused and the insns are trivially dead. */ + tmp_reg = ((reload_in_progress || reload_completed) + ? reg : gen_reg_rtx (Pmode)); + + if (function_label_operand (orig, mode)) + { + /* Force function label into memory in word mode. */ + orig = XEXP (force_const_mem (word_mode, orig), 0); + /* Load plabel address from DLT. */ + emit_move_insn (tmp_reg, + gen_rtx_PLUS (word_mode, pic_offset_table_rtx, + gen_rtx_HIGH (word_mode, orig))); + pic_ref + = gen_const_mem (Pmode, + gen_rtx_LO_SUM (Pmode, tmp_reg, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, orig), + UNSPEC_DLTIND14R))); + emit_move_insn (reg, pic_ref); + /* Now load address of function descriptor. */ + pic_ref = gen_rtx_MEM (Pmode, reg); + } + else + { + /* Load symbol reference from DLT. */ + emit_move_insn (tmp_reg, + gen_rtx_PLUS (word_mode, pic_offset_table_rtx, + gen_rtx_HIGH (word_mode, orig))); + pic_ref + = gen_const_mem (Pmode, + gen_rtx_LO_SUM (Pmode, tmp_reg, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, orig), + UNSPEC_DLTIND14R))); + } + + crtl->uses_pic_offset_table = 1; + mark_reg_pointer (reg, BITS_PER_UNIT); + insn = emit_move_insn (reg, pic_ref); + + /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ + set_unique_reg_note (insn, REG_EQUAL, orig); + + return reg; + } + else if (GET_CODE (orig) == CONST) + { + rtx base; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) + return orig; + + gcc_assert (reg); + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + + base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); + orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, + base == reg ? 0 : reg); + + if (GET_CODE (orig) == CONST_INT) + { + if (INT_14_BITS (orig)) + return plus_constant (base, INTVAL (orig)); + orig = force_reg (Pmode, orig); + } + pic_ref = gen_rtx_PLUS (Pmode, base, orig); + /* Likewise, should we set special REG_NOTEs here? */ + } + + return pic_ref; +} + +static GTY(()) rtx gen_tls_tga; + +static rtx +gen_tls_get_addr (void) +{ + if (!gen_tls_tga) + gen_tls_tga = init_one_libfunc ("__tls_get_addr"); + return gen_tls_tga; +} + +static rtx +hppa_tls_call (rtx arg) +{ + rtx ret; + + ret = gen_reg_rtx (Pmode); + emit_library_call_value (gen_tls_get_addr (), ret, + LCT_CONST, Pmode, 1, arg, Pmode); + + return ret; +} + +static rtx +legitimize_tls_address (rtx addr) +{ + rtx ret, insn, tmp, t1, t2, tp; + enum tls_model model = SYMBOL_REF_TLS_MODEL (addr); + + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + tmp = gen_reg_rtx (Pmode); + if (flag_pic) + emit_insn (gen_tgd_load_pic (tmp, addr)); + else + emit_insn (gen_tgd_load (tmp, addr)); + ret = hppa_tls_call (tmp); + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + ret = gen_reg_rtx (Pmode); + tmp = gen_reg_rtx (Pmode); + start_sequence (); + if (flag_pic) + emit_insn (gen_tld_load_pic (tmp, addr)); + else + emit_insn (gen_tld_load (tmp, addr)); + t1 = hppa_tls_call (tmp); + insn = get_insns (); + end_sequence (); + t2 = gen_reg_rtx (Pmode); + emit_libcall_block (insn, t2, t1, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLSLDBASE)); + emit_insn (gen_tld_offset_load (ret, addr, t2)); + break; + + case TLS_MODEL_INITIAL_EXEC: + tp = gen_reg_rtx (Pmode); + tmp = gen_reg_rtx (Pmode); + ret = gen_reg_rtx (Pmode); + emit_insn (gen_tp_load (tp)); + if (flag_pic) + emit_insn (gen_tie_load_pic (tmp, addr)); + else + emit_insn (gen_tie_load (tmp, addr)); + emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); + break; + + case TLS_MODEL_LOCAL_EXEC: + tp = gen_reg_rtx (Pmode); + ret = gen_reg_rtx (Pmode); + emit_insn (gen_tp_load (tp)); + emit_insn (gen_tle_load (ret, addr, tp)); + break; + + default: + gcc_unreachable (); + } + + return ret; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the PA, transform: + + memory(X + <large int>) + + into: + + if (<large int> & mask) >= 16 + Y = (<large int> & ~mask) + mask + 1 Round up. + else + Y = (<large int> & ~mask) Round down. + Z = X + Y + memory (Z + (<large int> - Y)); + + This is for CSE to find several similar references, and only use one Z. + + X can either be a SYMBOL_REF or REG, but because combine cannot + perform a 4->2 combination we do nothing for SYMBOL_REF + D where + D will not fit in 14 bits. + + MODE_FLOAT references allow displacements which fit in 5 bits, so use + 0x1f as the mask. + + MODE_INT references allow displacements which fit in 14 bits, so use + 0x3fff as the mask. + + This relies on the fact that most mode MODE_FLOAT references will use FP + registers and most mode MODE_INT references will use integer registers. + (In the rare case of an FP register used in an integer MODE, we depend + on secondary reloads to clean things up.) + + + It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special + manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed + addressing modes to be used). + + Put X and Z into registers. Then put the entire expression into + a register. */ + +rtx +hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx orig = x; + + /* We need to canonicalize the order of operands in unscaled indexed + addresses since the code that checks if an address is valid doesn't + always try both orders. */ + if (!TARGET_NO_SPACE_REGS + && GET_CODE (x) == PLUS + && GET_MODE (x) == Pmode + && REG_P (XEXP (x, 0)) + && REG_P (XEXP (x, 1)) + && REG_POINTER (XEXP (x, 0)) + && !REG_POINTER (XEXP (x, 1))) + return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); + + if (PA_SYMBOL_REF_TLS_P (x)) + return legitimize_tls_address (x); + else if (flag_pic) + return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); + + /* Strip off CONST. */ + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + + /* Special case. Get the SYMBOL_REF into a register and use indexing. + That should always be safe. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) + { + rtx reg = force_reg (Pmode, XEXP (x, 1)); + return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); + } + + /* Note we must reject symbols which represent function addresses + since the assembler/linker can't handle arithmetic on plabels. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == CONST_INT + && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) + || GET_CODE (XEXP (x, 0)) == REG)) + { + rtx int_part, ptr_reg; + int newoffset; + int offset = INTVAL (XEXP (x, 1)); + int mask; + + mask = (GET_MODE_CLASS (mode) == MODE_FLOAT + ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff); + + /* Choose which way to round the offset. Round up if we + are >= halfway to the next boundary. */ + if ((offset & mask) >= ((mask + 1) / 2)) + newoffset = (offset & ~ mask) + mask + 1; + else + newoffset = (offset & ~ mask); + + /* If the newoffset will not fit in 14 bits (ldo), then + handling this would take 4 or 5 instructions (2 to load + the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to + add the new offset and the SYMBOL_REF.) Combine can + not handle 4->2 or 5->2 combinations, so do not create + them. */ + if (! VAL_14_BITS_P (newoffset) + && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) + { + rtx const_part = plus_constant (XEXP (x, 0), newoffset); + rtx tmp_reg + = force_reg (Pmode, + gen_rtx_HIGH (Pmode, const_part)); + ptr_reg + = force_reg (Pmode, + gen_rtx_LO_SUM (Pmode, + tmp_reg, const_part)); + } + else + { + if (! VAL_14_BITS_P (newoffset)) + int_part = force_reg (Pmode, GEN_INT (newoffset)); + else + int_part = GEN_INT (newoffset); + + ptr_reg = force_reg (Pmode, + gen_rtx_PLUS (Pmode, + force_reg (Pmode, XEXP (x, 0)), + int_part)); + } + return plus_constant (ptr_reg, offset - newoffset); + } + + /* Handle (plus (mult (a) (shadd_constant)) (b)). */ + + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) + && (OBJECT_P (XEXP (x, 1)) + || GET_CODE (XEXP (x, 1)) == SUBREG) + && GET_CODE (XEXP (x, 1)) != CONST) + { + int val = INTVAL (XEXP (XEXP (x, 0), 1)); + rtx reg1, reg2; + + reg1 = XEXP (x, 1); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + reg2, + GEN_INT (val)), + reg1)); + } + + /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). + + Only do so for floating point modes since this is more speculative + and we lose if it's an integer store. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) + && (mode == SFmode || mode == DFmode)) + { + + /* First, try and figure out what to use as a base register. */ + rtx reg1, reg2, base, idx; + + reg1 = XEXP (XEXP (x, 0), 1); + reg2 = XEXP (x, 1); + base = NULL_RTX; + idx = NULL_RTX; + + /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], + then emit_move_sequence will turn on REG_POINTER so we'll know + it's a base register below. */ + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + /* Figure out what the base and index are. */ + + if (GET_CODE (reg1) == REG + && REG_POINTER (reg1)) + { + base = reg1; + idx = gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + XEXP (XEXP (XEXP (x, 0), 0), 0), + XEXP (XEXP (XEXP (x, 0), 0), 1)), + XEXP (x, 1)); + } + else if (GET_CODE (reg2) == REG + && REG_POINTER (reg2)) + { + base = reg2; + idx = XEXP (x, 0); + } + + if (base == 0) + return orig; + + /* If the index adds a large constant, try to scale the + constant so that it can be loaded with only one insn. */ + if (GET_CODE (XEXP (idx, 1)) == CONST_INT + && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) + / INTVAL (XEXP (XEXP (idx, 0), 1))) + && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) + { + /* Divide the CONST_INT by the scale factor, then add it to A. */ + int val = INTVAL (XEXP (idx, 1)); + + val /= INTVAL (XEXP (XEXP (idx, 0), 1)); + reg1 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); + + /* We can now generate a simple scaled indexed address. */ + return + force_reg + (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg1, + XEXP (XEXP (idx, 0), 1)), + base)); + } + + /* If B + C is still a valid base register, then add them. */ + if (GET_CODE (XEXP (idx, 1)) == CONST_INT + && INTVAL (XEXP (idx, 1)) <= 4096 + && INTVAL (XEXP (idx, 1)) >= -4096) + { + int val = INTVAL (XEXP (XEXP (idx, 0), 1)); + rtx reg1, reg2; + + reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); + + reg2 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg2) != CONST_INT) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + reg2, + GEN_INT (val)), + reg1)); + } + + /* Get the index into a register, then add the base + index and + return a register holding the result. */ + + /* First get A into a register. */ + reg1 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + /* And get B into a register. */ + reg2 = XEXP (idx, 1); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + reg1 = force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg1, + XEXP (XEXP (idx, 0), 1)), + reg2)); + + /* Add the result to our base register and return. */ + return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); + + } + + /* Uh-oh. We might have an address for x[n-100000]. This needs + special handling to avoid creating an indexed memory address + with x-100000 as the base. + + If the constant part is small enough, then it's still safe because + there is a guard page at the beginning and end of the data segment. + + Scaled references are common enough that we want to try and rearrange the + terms so that we can use indexing for these addresses too. Only + do the optimization for floatint point modes. */ + + if (GET_CODE (x) == PLUS + && symbolic_expression_p (XEXP (x, 1))) + { + /* Ugly. We modify things here so that the address offset specified + by the index expression is computed first, then added to x to form + the entire address. */ + + rtx regx1, regx2, regy1, regy2, y; + + /* Strip off any CONST. */ + y = XEXP (x, 1); + if (GET_CODE (y) == CONST) + y = XEXP (y, 0); + + if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) + { + /* See if this looks like + (plus (mult (reg) (shadd_const)) + (const (plus (symbol_ref) (const_int)))) + + Where const_int is small. In that case the const + expression is a valid pointer for indexing. + + If const_int is big, but can be divided evenly by shadd_const + and added to (reg). This allows more scaled indexed addresses. */ + if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF + && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) >= -4096 + && INTVAL (XEXP (y, 1)) <= 4095 + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) + { + int val = INTVAL (XEXP (XEXP (x, 0), 1)); + rtx reg1, reg2; + + reg1 = XEXP (x, 1); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + reg2, + GEN_INT (val)), + reg1)); + } + else if ((mode == DFmode || mode == SFmode) + && GET_CODE (XEXP (y, 0)) == SYMBOL_REF + && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) + { + regx1 + = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) + / INTVAL (XEXP (XEXP (x, 0), 1)))); + regx2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (regx2) != REG) + regx2 = force_reg (Pmode, force_operand (regx2, 0)); + regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, + regx2, regx1)); + return + force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, regx2, + XEXP (XEXP (x, 0), 1)), + force_reg (Pmode, XEXP (y, 0)))); + } + else if (GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) >= -4096 + && INTVAL (XEXP (y, 1)) <= 4095) + { + /* This is safe because of the guard page at the + beginning and end of the data space. Just + return the original address. */ + return orig; + } + else + { + /* Doesn't look like one we can optimize. */ + regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); + regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); + regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); + regx1 = force_reg (Pmode, + gen_rtx_fmt_ee (GET_CODE (y), Pmode, + regx1, regy2)); + return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); + } + } + } + + return orig; +} + +/* Implement the TARGET_REGISTER_MOVE_COST hook. + + Compute extra cost of moving data between one register class + and another. + + Make moves from SAR so expensive they should never happen. We used to + have 0xffff here, but that generates overflow in rare cases. + + Copies involving a FP register and a non-FP register are relatively + expensive because they must go through memory. + + Other copies are reasonably cheap. */ + +static int +hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + if (from == SHIFT_REGS) + return 0x100; + else if (to == SHIFT_REGS && FP_REG_CLASS_P (from)) + return 18; + else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to)) + || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from))) + return 16; + else + return 2; +} + +/* For the HPPA, REG and REG+CONST is cost 0 + and addresses involving symbolic constants are cost 2. + + PIC addresses are very expensive. + + It is no coincidence that this has the same structure + as GO_IF_LEGITIMATE_ADDRESS. */ + +static int +hppa_address_cost (rtx X, + bool speed ATTRIBUTE_UNUSED) +{ + switch (GET_CODE (X)) + { + case REG: + case PLUS: + case LO_SUM: + return 1; + case HIGH: + return 2; + default: + return 4; + } +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +hppa_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) +{ + switch (code) + { + case CONST_INT: + if (INTVAL (x) == 0) + *total = 0; + else if (INT_14_BITS (x)) + *total = 1; + else + *total = 2; + return true; + + case HIGH: + *total = 2; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = 4; + return true; + + case CONST_DOUBLE: + if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) + && outer_code != SET) + *total = 0; + else + *total = 8; + return true; + + case MULT: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + *total = COSTS_N_INSNS (3); + else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) + *total = COSTS_N_INSNS (8); + else + *total = COSTS_N_INSNS (20); + return true; + + case DIV: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + *total = COSTS_N_INSNS (14); + return true; + } + /* FALLTHRU */ + + case UDIV: + case MOD: + case UMOD: + *total = COSTS_N_INSNS (60); + return true; + + case PLUS: /* this includes shNadd insns */ + case MINUS: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + *total = COSTS_N_INSNS (3); + else + *total = COSTS_N_INSNS (1); + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + *total = COSTS_N_INSNS (1); + return true; + + default: + return false; + } +} + +/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a + new rtx with the correct mode. */ +static inline rtx +force_mode (enum machine_mode mode, rtx orig) +{ + if (mode == GET_MODE (orig)) + return orig; + + gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); + + return gen_rtx_REG (mode, REGNO (orig)); +} + +/* Return 1 if *X is a thread-local symbol. */ + +static int +pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + return PA_SYMBOL_REF_TLS_P (*x); +} + +/* Return 1 if X contains a thread-local symbol. */ + +bool +pa_tls_referenced_p (rtx x) +{ + if (!TARGET_HAVE_TLS) + return false; + + return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0); +} + +/* Emit insns to move operands[1] into operands[0]. + + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move + normally. + + Note SCRATCH_REG may not be in the proper mode depending on how it + will be used. This routine is responsible for creating a new copy + of SCRATCH_REG in the proper mode. */ + +int +emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg) +{ + register rtx operand0 = operands[0]; + register rtx operand1 = operands[1]; + register rtx tem; + + /* We can only handle indexed addresses in the destination operand + of floating point stores. Thus, we need to break out indexed + addresses from the destination operand. */ + if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) + { + gcc_assert (can_create_pseudo_p ()); + + tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); + operand0 = replace_equiv_address (operand0, tem); + } + + /* On targets with non-equivalent space registers, break out unscaled + indexed addresses from the source operand before the final CSE. + We have to do this because the REG_POINTER flag is not correctly + carried through various optimization passes and CSE may substitute + a pseudo without the pointer set for one with the pointer set. As + a result, we loose various opportunities to create insns with + unscaled indexed addresses. */ + if (!TARGET_NO_SPACE_REGS + && !cse_not_expected + && GET_CODE (operand1) == MEM + && GET_CODE (XEXP (operand1, 0)) == PLUS + && REG_P (XEXP (XEXP (operand1, 0), 0)) + && REG_P (XEXP (XEXP (operand1, 0), 1))) + operand1 + = replace_equiv_address (operand1, + copy_to_mode_reg (Pmode, XEXP (operand1, 0))); + + if (scratch_reg + && reload_in_progress && GET_CODE (operand0) == REG + && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + operand0 = reg_equiv_mem[REGNO (operand0)]; + else if (scratch_reg + && reload_in_progress && GET_CODE (operand0) == SUBREG + && GET_CODE (SUBREG_REG (operand0)) == REG + && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) + { + /* We must not alter SUBREG_BYTE (operand0) since that would confuse + the code which tracks sets/uses for delete_output_reload. */ + rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), + reg_equiv_mem [REGNO (SUBREG_REG (operand0))], + SUBREG_BYTE (operand0)); + operand0 = alter_subreg (&temp); + } + + if (scratch_reg + && reload_in_progress && GET_CODE (operand1) == REG + && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) + operand1 = reg_equiv_mem[REGNO (operand1)]; + else if (scratch_reg + && reload_in_progress && GET_CODE (operand1) == SUBREG + && GET_CODE (SUBREG_REG (operand1)) == REG + && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) + { + /* We must not alter SUBREG_BYTE (operand0) since that would confuse + the code which tracks sets/uses for delete_output_reload. */ + rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), + reg_equiv_mem [REGNO (SUBREG_REG (operand1))], + SUBREG_BYTE (operand1)); + operand1 = alter_subreg (&temp); + } + + if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM + && ((tem = find_replacement (&XEXP (operand0, 0))) + != XEXP (operand0, 0))) + operand0 = replace_equiv_address (operand0, tem); + + if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM + && ((tem = find_replacement (&XEXP (operand1, 0))) + != XEXP (operand1, 0))) + operand1 = replace_equiv_address (operand1, tem); + + /* Handle secondary reloads for loads/stores of FP registers from + REG+D addresses where D does not fit in 5 or 14 bits, including + (subreg (mem (addr))) cases. */ + if (scratch_reg + && fp_reg_operand (operand0, mode) + && ((GET_CODE (operand1) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode), + XEXP (operand1, 0))) + || ((GET_CODE (operand1) == SUBREG + && GET_CODE (XEXP (operand1, 0)) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (XEXP (operand1, 0), 0)))))) + { + if (GET_CODE (operand1) == SUBREG) + operand1 = XEXP (operand1, 0); + + /* SCRATCH_REG will hold an address and maybe the actual data. We want + it in WORD_MODE regardless of what mode it was originally given + to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (!memory_address_p (Pmode, XEXP (operand1, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); + emit_move_insn (scratch_reg, + gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), + Pmode, + XEXP (XEXP (operand1, 0), 0), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, XEXP (operand1, 0)); + emit_insn (gen_rtx_SET (VOIDmode, operand0, + replace_equiv_address (operand1, scratch_reg))); + return 1; + } + else if (scratch_reg + && fp_reg_operand (operand1, mode) + && ((GET_CODE (operand0) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (operand0, 0))) + || ((GET_CODE (operand0) == SUBREG) + && GET_CODE (XEXP (operand0, 0)) == MEM + && !memory_address_p ((GET_MODE_SIZE (mode) == 4 + ? SFmode : DFmode), + XEXP (XEXP (operand0, 0), 0))))) + { + if (GET_CODE (operand0) == SUBREG) + operand0 = XEXP (operand0, 0); + + /* SCRATCH_REG will hold an address and maybe the actual data. We want + it in WORD_MODE regardless of what mode it was originally given + to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (!memory_address_p (Pmode, XEXP (operand0, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, + 0)), + Pmode, + XEXP (XEXP (operand0, 0), + 0), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, XEXP (operand0, 0)); + emit_insn (gen_rtx_SET (VOIDmode, + replace_equiv_address (operand0, scratch_reg), + operand1)); + return 1; + } + /* Handle secondary reloads for loads of FP registers from constant + expressions by forcing the constant into memory. + + Use scratch_reg to hold the address of the memory location. + + The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return + NO_REGS when presented with a const_int and a register class + containing only FP registers. Doing so unfortunately creates + more problems than it solves. Fix this for 2.5. */ + else if (scratch_reg + && CONSTANT_P (operand1) + && fp_reg_operand (operand0, mode)) + { + rtx const_mem, xoperands[2]; + + /* SCRATCH_REG will hold an address and maybe the actual data. We want + it in WORD_MODE regardless of what mode it was originally given + to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + /* Force the constant into memory and put the address of the + memory location into scratch_reg. */ + const_mem = force_const_mem (mode, operand1); + xoperands[0] = scratch_reg; + xoperands[1] = XEXP (const_mem, 0); + emit_move_sequence (xoperands, Pmode, 0); + + /* Now load the destination register. */ + emit_insn (gen_rtx_SET (mode, operand0, + replace_equiv_address (const_mem, scratch_reg))); + return 1; + } + /* Handle secondary reloads for SAR. These occur when trying to load + the SAR from memory or a constant. */ + else if (scratch_reg + && GET_CODE (operand0) == REG + && REGNO (operand0) < FIRST_PSEUDO_REGISTER + && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS + && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) + { + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (GET_CODE (operand1) == MEM + && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) + { + /* We are reloading the address into the scratch register, so we + want to make sure the scratch register is a full register. */ + scratch_reg = force_mode (word_mode, scratch_reg); + + emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, + 0)), + Pmode, + XEXP (XEXP (operand1, 0), + 0), + scratch_reg)); + + /* Now we are going to load the scratch register from memory, + we want to load it in the same width as the original MEM, + which must be the same as the width of the ultimate destination, + OPERAND0. */ + scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); + + emit_move_insn (scratch_reg, + replace_equiv_address (operand1, scratch_reg)); + } + else + { + /* We want to load the scratch register using the same mode as + the ultimate destination. */ + scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); + + emit_move_insn (scratch_reg, operand1); + } + + /* And emit the insn to set the ultimate destination. We know that + the scratch register has the same mode as the destination at this + point. */ + emit_move_insn (operand0, scratch_reg); + return 1; + } + /* Handle the most common case: storing into a register. */ + else if (register_operand (operand0, mode)) + { + /* Legitimize TLS symbol references. This happens for references + that aren't a legitimate constant. */ + if (PA_SYMBOL_REF_TLS_P (operand1)) + operand1 = legitimize_tls_address (operand1); + + if (register_operand (operand1, mode) + || (GET_CODE (operand1) == CONST_INT + && cint_ok_for_move (INTVAL (operand1))) + || (operand1 == CONST0_RTX (mode)) + || (GET_CODE (operand1) == HIGH + && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) + /* Only `general_operands' can come here, so MEM is ok. */ + || GET_CODE (operand1) == MEM) + { + /* Various sets are created during RTL generation which don't + have the REG_POINTER flag correctly set. After the CSE pass, + instruction recognition can fail if we don't consistently + set this flag when performing register copies. This should + also improve the opportunities for creating insns that use + unscaled indexing. */ + if (REG_P (operand0) && REG_P (operand1)) + { + if (REG_POINTER (operand1) + && !REG_POINTER (operand0) + && !HARD_REGISTER_P (operand0)) + copy_reg_pointer (operand0, operand1); + } + + /* When MEMs are broken out, the REG_POINTER flag doesn't + get set. In some cases, we can set the REG_POINTER flag + from the declaration for the MEM. */ + if (REG_P (operand0) + && GET_CODE (operand1) == MEM + && !REG_POINTER (operand0)) + { + tree decl = MEM_EXPR (operand1); + + /* Set the register pointer flag and register alignment + if the declaration for this memory reference is a + pointer type. */ + if (decl) + { + tree type; + + /* If this is a COMPONENT_REF, use the FIELD_DECL from + tree operand 1. */ + if (TREE_CODE (decl) == COMPONENT_REF) + decl = TREE_OPERAND (decl, 1); + + type = TREE_TYPE (decl); + type = strip_array_types (type); + + if (POINTER_TYPE_P (type)) + { + int align; + + type = TREE_TYPE (type); + /* Using TYPE_ALIGN_OK is rather conservative as + only the ada frontend actually sets it. */ + align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type) + : BITS_PER_UNIT); + mark_reg_pointer (operand0, align); + } + } + } + + emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); + return 1; + } + } + else if (GET_CODE (operand0) == MEM) + { + if (mode == DFmode && operand1 == CONST0_RTX (mode) + && !(reload_in_progress || reload_completed)) + { + rtx temp = gen_reg_rtx (DFmode); + + emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); + emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); + return 1; + } + if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) + { + /* Run this case quickly. */ + emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); + return 1; + } + if (! (reload_in_progress || reload_completed)) + { + operands[0] = validize_mem (operand0); + operands[1] = operand1 = force_reg (mode, operand1); + } + } + + /* Simplify the source if we need to. + Note we do have to handle function labels here, even though we do + not consider them legitimate constants. Loop optimizations can + call the emit_move_xxx with one as a source. */ + if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) + || function_label_operand (operand1, mode) + || (GET_CODE (operand1) == HIGH + && symbolic_operand (XEXP (operand1, 0), mode))) + { + int ishighonly = 0; + + if (GET_CODE (operand1) == HIGH) + { + ishighonly = 1; + operand1 = XEXP (operand1, 0); + } + if (symbolic_operand (operand1, mode)) + { + /* Argh. The assembler and linker can't handle arithmetic + involving plabels. + + So we force the plabel into memory, load operand0 from + the memory location, then add in the constant part. */ + if ((GET_CODE (operand1) == CONST + && GET_CODE (XEXP (operand1, 0)) == PLUS + && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode)) + || function_label_operand (operand1, mode)) + { + rtx temp, const_part; + + /* Figure out what (if any) scratch register to use. */ + if (reload_in_progress || reload_completed) + { + scratch_reg = scratch_reg ? scratch_reg : operand0; + /* SCRATCH_REG will hold an address and maybe the actual + data. We want it in WORD_MODE regardless of what mode it + was originally given to us. */ + scratch_reg = force_mode (word_mode, scratch_reg); + } + else if (flag_pic) + scratch_reg = gen_reg_rtx (Pmode); + + if (GET_CODE (operand1) == CONST) + { + /* Save away the constant part of the expression. */ + const_part = XEXP (XEXP (operand1, 0), 1); + gcc_assert (GET_CODE (const_part) == CONST_INT); + + /* Force the function label into memory. */ + temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); + } + else + { + /* No constant part. */ + const_part = NULL_RTX; + + /* Force the function label into memory. */ + temp = force_const_mem (mode, operand1); + } + + + /* Get the address of the memory location. PIC-ify it if + necessary. */ + temp = XEXP (temp, 0); + if (flag_pic) + temp = legitimize_pic_address (temp, mode, scratch_reg); + + /* Put the address of the memory location into our destination + register. */ + operands[1] = temp; + emit_move_sequence (operands, mode, scratch_reg); + + /* Now load from the memory location into our destination + register. */ + operands[1] = gen_rtx_MEM (Pmode, operands[0]); + emit_move_sequence (operands, mode, scratch_reg); + + /* And add back in the constant part. */ + if (const_part != NULL_RTX) + expand_inc (operand0, const_part); + + return 1; + } + + if (flag_pic) + { + rtx temp; + + if (reload_in_progress || reload_completed) + { + temp = scratch_reg ? scratch_reg : operand0; + /* TEMP will hold an address and maybe the actual + data. We want it in WORD_MODE regardless of what mode it + was originally given to us. */ + temp = force_mode (word_mode, temp); + } + else + temp = gen_reg_rtx (Pmode); + + /* (const (plus (symbol) (const_int))) must be forced to + memory during/after reload if the const_int will not fit + in 14 bits. */ + if (GET_CODE (operand1) == CONST + && GET_CODE (XEXP (operand1, 0)) == PLUS + && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT + && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) + && (reload_completed || reload_in_progress) + && flag_pic) + { + rtx const_mem = force_const_mem (mode, operand1); + operands[1] = legitimize_pic_address (XEXP (const_mem, 0), + mode, temp); + operands[1] = replace_equiv_address (const_mem, operands[1]); + emit_move_sequence (operands, mode, temp); + } + else + { + operands[1] = legitimize_pic_address (operand1, mode, temp); + if (REG_P (operand0) && REG_P (operands[1])) + copy_reg_pointer (operand0, operands[1]); + emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); + } + } + /* On the HPPA, references to data space are supposed to use dp, + register 27, but showing it in the RTL inhibits various cse + and loop optimizations. */ + else + { + rtx temp, set; + + if (reload_in_progress || reload_completed) + { + temp = scratch_reg ? scratch_reg : operand0; + /* TEMP will hold an address and maybe the actual + data. We want it in WORD_MODE regardless of what mode it + was originally given to us. */ + temp = force_mode (word_mode, temp); + } + else + temp = gen_reg_rtx (mode); + + /* Loading a SYMBOL_REF into a register makes that register + safe to be used as the base in an indexed address. + + Don't mark hard registers though. That loses. */ + if (GET_CODE (operand0) == REG + && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + mark_reg_pointer (operand0, BITS_PER_UNIT); + if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) + mark_reg_pointer (temp, BITS_PER_UNIT); + + if (ishighonly) + set = gen_rtx_SET (mode, operand0, temp); + else + set = gen_rtx_SET (VOIDmode, + operand0, + gen_rtx_LO_SUM (mode, temp, operand1)); + + emit_insn (gen_rtx_SET (VOIDmode, + temp, + gen_rtx_HIGH (mode, operand1))); + emit_insn (set); + + } + return 1; + } + else if (pa_tls_referenced_p (operand1)) + { + rtx tmp = operand1; + rtx addend = NULL; + + if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) + { + addend = XEXP (XEXP (tmp, 0), 1); + tmp = XEXP (XEXP (tmp, 0), 0); + } + + gcc_assert (GET_CODE (tmp) == SYMBOL_REF); + tmp = legitimize_tls_address (tmp); + if (addend) + { + tmp = gen_rtx_PLUS (mode, tmp, addend); + tmp = force_operand (tmp, operands[0]); + } + operands[1] = tmp; + } + else if (GET_CODE (operand1) != CONST_INT + || !cint_ok_for_move (INTVAL (operand1))) + { + rtx insn, temp; + rtx op1 = operand1; + HOST_WIDE_INT value = 0; + HOST_WIDE_INT insv = 0; + int insert = 0; + + if (GET_CODE (operand1) == CONST_INT) + value = INTVAL (operand1); + + if (TARGET_64BIT + && GET_CODE (operand1) == CONST_INT + && HOST_BITS_PER_WIDE_INT > 32 + && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) + { + HOST_WIDE_INT nval; + + /* Extract the low order 32 bits of the value and sign extend. + If the new value is the same as the original value, we can + can use the original value as-is. If the new value is + different, we use it and insert the most-significant 32-bits + of the original value into the final result. */ + nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) + ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); + if (value != nval) + { +#if HOST_BITS_PER_WIDE_INT > 32 + insv = value >= 0 ? value >> 32 : ~(~value >> 32); +#endif + insert = 1; + value = nval; + operand1 = GEN_INT (nval); + } + } + + if (reload_in_progress || reload_completed) + temp = scratch_reg ? scratch_reg : operand0; + else + temp = gen_reg_rtx (mode); + + /* We don't directly split DImode constants on 32-bit targets + because PLUS uses an 11-bit immediate and the insn sequence + generated is not as efficient as the one using HIGH/LO_SUM. */ + if (GET_CODE (operand1) == CONST_INT + && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD + && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT + && !insert) + { + /* Directly break constant into high and low parts. This + provides better optimization opportunities because various + passes recognize constants split with PLUS but not LO_SUM. + We use a 14-bit signed low part except when the addition + of 0x4000 to the high part might change the sign of the + high part. */ + HOST_WIDE_INT low = value & 0x3fff; + HOST_WIDE_INT high = value & ~ 0x3fff; + + if (low >= 0x2000) + { + if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) + high += 0x2000; + else + high += 0x4000; + } + + low = value - high; + + emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high))); + operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_HIGH (mode, operand1))); + operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); + } + + insn = emit_move_insn (operands[0], operands[1]); + + /* Now insert the most significant 32 bits of the value + into the register. When we don't have a second register + available, it could take up to nine instructions to load + a 64-bit integer constant. Prior to reload, we force + constants that would take more than three instructions + to load to the constant pool. During and after reload, + we have to handle all possible values. */ + if (insert) + { + /* Use a HIGH/LO_SUM/INSV sequence if we have a second + register and the value to be inserted is outside the + range that can be loaded with three depdi instructions. */ + if (temp != operand0 && (insv >= 16384 || insv < -16384)) + { + operand1 = GEN_INT (insv); + + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_HIGH (mode, operand1))); + emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); + emit_insn (gen_insv (operand0, GEN_INT (32), + const0_rtx, temp)); + } + else + { + int len = 5, pos = 27; + + /* Insert the bits using the depdi instruction. */ + while (pos >= 0) + { + HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; + HOST_WIDE_INT sign = v5 < 0; + + /* Left extend the insertion. */ + insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); + while (pos > 0 && (insv & 1) == sign) + { + insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); + len += 1; + pos -= 1; + } + + emit_insn (gen_insv (operand0, GEN_INT (len), + GEN_INT (pos), GEN_INT (v5))); + + len = pos > 0 && pos < 5 ? pos : 5; + pos -= len; + } + } + } + + set_unique_reg_note (insn, REG_EQUAL, op1); + + return 1; + } + } + /* Now have insn-emit do whatever it normally does. */ + return 0; +} + +/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning + it will need a link/runtime reloc). */ + +int +reloc_needed (tree exp) +{ + int reloc = 0; + + switch (TREE_CODE (exp)) + { + case ADDR_EXPR: + return 1; + + case POINTER_PLUS_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + reloc = reloc_needed (TREE_OPERAND (exp, 0)); + reloc |= reloc_needed (TREE_OPERAND (exp, 1)); + break; + + CASE_CONVERT: + case NON_LVALUE_EXPR: + reloc = reloc_needed (TREE_OPERAND (exp, 0)); + break; + + case CONSTRUCTOR: + { + tree value; + unsigned HOST_WIDE_INT ix; + + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) + if (value) + reloc |= reloc_needed (value); + } + break; + + case ERROR_MARK: + break; + + default: + break; + } + return reloc; +} + +/* Does operand (which is a symbolic_operand) live in text space? + If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info, + will be true. */ + +int +read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (GET_CODE (operand) == CONST) + operand = XEXP (XEXP (operand, 0), 0); + if (flag_pic) + { + if (GET_CODE (operand) == SYMBOL_REF) + return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand); + } + else + { + if (GET_CODE (operand) == SYMBOL_REF) + return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand); + } + return 1; +} + + +/* Return the best assembler insn template + for moving operands[1] into operands[0] as a fullword. */ +const char * +singlemove_string (rtx *operands) +{ + HOST_WIDE_INT intval; + + if (GET_CODE (operands[0]) == MEM) + return "stw %r1,%0"; + if (GET_CODE (operands[1]) == MEM) + return "ldw %1,%0"; + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + long i; + REAL_VALUE_TYPE d; + + gcc_assert (GET_MODE (operands[1]) == SFmode); + + /* Translate the CONST_DOUBLE to a CONST_INT with the same target + bit pattern. */ + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (d, i); + + operands[1] = GEN_INT (i); + /* Fall through to CONST_INT case. */ + } + if (GET_CODE (operands[1]) == CONST_INT) + { + intval = INTVAL (operands[1]); + + if (VAL_14_BITS_P (intval)) + return "ldi %1,%0"; + else if ((intval & 0x7ff) == 0) + return "ldil L'%1,%0"; + else if (zdepi_cint_p (intval)) + return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; + else + return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; + } + return "copy %1,%0"; +} + + +/* Compute position (in OP[1]) and width (in OP[2]) + useful for copying IMM to a register using the zdepi + instructions. Store the immediate value to insert in OP[0]. */ +static void +compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) +{ + int lsb, len; + + /* Find the least significant set bit in IMM. */ + for (lsb = 0; lsb < 32; lsb++) + { + if ((imm & 1) != 0) + break; + imm >>= 1; + } + + /* Choose variants based on *sign* of the 5-bit field. */ + if ((imm & 0x10) == 0) + len = (lsb <= 28) ? 4 : 32 - lsb; + else + { + /* Find the width of the bitstring in IMM. */ + for (len = 5; len < 32 - lsb; len++) + { + if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) + break; + } + + /* Sign extend IMM as a 5-bit value. */ + imm = (imm & 0xf) - 0x10; + } + + op[0] = imm; + op[1] = 31 - lsb; + op[2] = len; +} + +/* Compute position (in OP[1]) and width (in OP[2]) + useful for copying IMM to a register using the depdi,z + instructions. Store the immediate value to insert in OP[0]. */ +void +compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) +{ + int lsb, len, maxlen; + + maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); + + /* Find the least significant set bit in IMM. */ + for (lsb = 0; lsb < maxlen; lsb++) + { + if ((imm & 1) != 0) + break; + imm >>= 1; + } + + /* Choose variants based on *sign* of the 5-bit field. */ + if ((imm & 0x10) == 0) + len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; + else + { + /* Find the width of the bitstring in IMM. */ + for (len = 5; len < maxlen - lsb; len++) + { + if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) + break; + } + + /* Extend length if host is narrow and IMM is negative. */ + if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) + len += 32; + + /* Sign extend IMM as a 5-bit value. */ + imm = (imm & 0xf) - 0x10; + } + + op[0] = imm; + op[1] = 63 - lsb; + op[2] = len; +} + +/* Output assembler code to perform a doubleword move insn + with operands OPERANDS. */ + +const char * +output_move_double (rtx *operands) +{ + enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; + rtx latehalf[2]; + rtx addreg0 = 0, addreg1 = 0; + + /* First classify both operands. */ + + if (REG_P (operands[0])) + optype0 = REGOP; + else if (offsettable_memref_p (operands[0])) + optype0 = OFFSOP; + else if (GET_CODE (operands[0]) == MEM) + optype0 = MEMOP; + else + optype0 = RNDOP; + + if (REG_P (operands[1])) + optype1 = REGOP; + else if (CONSTANT_P (operands[1])) + optype1 = CNSTOP; + else if (offsettable_memref_p (operands[1])) + optype1 = OFFSOP; + else if (GET_CODE (operands[1]) == MEM) + optype1 = MEMOP; + else + optype1 = RNDOP; + + /* Check for the cases that the operand constraints are not + supposed to allow to happen. */ + gcc_assert (optype0 == REGOP || optype1 == REGOP); + + /* Handle copies between general and floating registers. */ + + if (optype0 == REGOP && optype1 == REGOP + && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) + { + if (FP_REG_P (operands[0])) + { + output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); + output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); + return "{fldds|fldd} -16(%%sp),%0"; + } + else + { + output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); + output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); + return "{ldws|ldw} -12(%%sp),%R0"; + } + } + + /* Handle auto decrementing and incrementing loads and stores + specifically, since the structure of the function doesn't work + for them without major modification. Do it better when we learn + this port about the general inc/dec addressing of PA. + (This was written by tege. Chide him if it doesn't work.) */ + + if (optype0 == MEMOP) + { + /* We have to output the address syntax ourselves, since print_operand + doesn't deal with the addresses we want to use. Fix this later. */ + + rtx addr = XEXP (operands[0], 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); + + operands[0] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[1]) == REG + && GET_CODE (operands[0]) == REG); + + gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); + + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == POST_INC) + return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; + return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; + } + else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); + + operands[0] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[1]) == REG + && GET_CODE (operands[0]) == REG); + + gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); + /* No overlap between high target register and address + register. (We do this in a non-obvious way to save a + register file writeback) */ + if (GET_CODE (addr) == PRE_INC) + return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; + return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; + } + } + if (optype1 == MEMOP) + { + /* We have to output the address syntax ourselves, since print_operand + doesn't deal with the addresses we want to use. Fix this later. */ + + rtx addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + operands[1] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[0]) == REG + && GET_CODE (operands[1]) == REG); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == POST_INC) + return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; + return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; + } + else + { + /* This is an undefined situation. We should load into the + address register *and* update that register. Probably + we don't need to handle this at all. */ + if (GET_CODE (addr) == POST_INC) + return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; + return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; + } + } + else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + operands[1] = XEXP (addr, 0); + gcc_assert (GET_CODE (operands[0]) == REG + && GET_CODE (operands[1]) == REG); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == PRE_INC) + return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; + return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; + } + else + { + /* This is an undefined situation. We should load into the + address register *and* update that register. Probably + we don't need to handle this at all. */ + if (GET_CODE (addr) == PRE_INC) + return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; + return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; + } + } + else if (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == MULT) + { + rtx xoperands[4]; + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + xoperands[0] = high_reg; + xoperands[1] = XEXP (addr, 1); + xoperands[2] = XEXP (XEXP (addr, 0), 0); + xoperands[3] = XEXP (XEXP (addr, 0), 1); + output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", + xoperands); + return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; + } + else + { + xoperands[0] = high_reg; + xoperands[1] = XEXP (addr, 1); + xoperands[2] = XEXP (XEXP (addr, 0), 0); + xoperands[3] = XEXP (XEXP (addr, 0), 1); + output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}", + xoperands); + return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0"; + } + } + } + + /* If an operand is an unoffsettable memory ref, find a register + we can increment temporarily to make it refer to the second word. */ + + if (optype0 == MEMOP) + addreg0 = find_addr_reg (XEXP (operands[0], 0)); + + if (optype1 == MEMOP) + addreg1 = find_addr_reg (XEXP (operands[1], 0)); + + /* Ok, we can do one word at a time. + Normally we do the low-numbered word first. + + In either case, set up in LATEHALF the operands to use + for the high-numbered word and in some cases alter the + operands in OPERANDS to be suitable for the low-numbered word. */ + + if (optype0 == REGOP) + latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + else if (optype0 == OFFSOP) + latehalf[0] = adjust_address (operands[0], SImode, 4); + else + latehalf[0] = operands[0]; + + if (optype1 == REGOP) + latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); + else if (optype1 == OFFSOP) + latehalf[1] = adjust_address (operands[1], SImode, 4); + else if (optype1 == CNSTOP) + split_double (operands[1], &operands[1], &latehalf[1]); + else + latehalf[1] = operands[1]; + + /* If the first move would clobber the source of the second one, + do them in the other order. + + This can happen in two cases: + + mem -> register where the first half of the destination register + is the same register used in the memory's address. Reload + can create such insns. + + mem in this case will be either register indirect or register + indirect plus a valid offset. + + register -> register move where REGNO(dst) == REGNO(src + 1) + someone (Tim/Tege?) claimed this can happen for parameter loads. + + Handle mem -> register case first. */ + if (optype0 == REGOP + && (optype1 == MEMOP || optype1 == OFFSOP) + && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, + operands[1], 0)) + { + /* Do the late half first. */ + if (addreg1) + output_asm_insn ("ldo 4(%0),%0", &addreg1); + output_asm_insn (singlemove_string (latehalf), latehalf); + + /* Then clobber. */ + if (addreg1) + output_asm_insn ("ldo -4(%0),%0", &addreg1); + return singlemove_string (operands); + } + + /* Now handle register -> register case. */ + if (optype0 == REGOP && optype1 == REGOP + && REGNO (operands[0]) == REGNO (operands[1]) + 1) + { + output_asm_insn (singlemove_string (latehalf), latehalf); + return singlemove_string (operands); + } + + /* Normal case: do the two words, low-numbered first. */ + + output_asm_insn (singlemove_string (operands), operands); + + /* Make any unoffsettable addresses point at high-numbered word. */ + if (addreg0) + output_asm_insn ("ldo 4(%0),%0", &addreg0); + if (addreg1) + output_asm_insn ("ldo 4(%0),%0", &addreg1); + + /* Do that word. */ + output_asm_insn (singlemove_string (latehalf), latehalf); + + /* Undo the adds we just did. */ + if (addreg0) + output_asm_insn ("ldo -4(%0),%0", &addreg0); + if (addreg1) + output_asm_insn ("ldo -4(%0),%0", &addreg1); + + return ""; +} + +const char * +output_fp_move_double (rtx *operands) +{ + if (FP_REG_P (operands[0])) + { + if (FP_REG_P (operands[1]) + || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) + output_asm_insn ("fcpy,dbl %f1,%0", operands); + else + output_asm_insn ("fldd%F1 %1,%0", operands); + } + else if (FP_REG_P (operands[1])) + { + output_asm_insn ("fstd%F0 %1,%0", operands); + } + else + { + rtx xoperands[2]; + + gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); + + /* This is a pain. You have to be prepared to deal with an + arbitrary address here including pre/post increment/decrement. + + so avoid this in the MD. */ + gcc_assert (GET_CODE (operands[0]) == REG); + + xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + xoperands[0] = operands[0]; + output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); + } + return ""; +} + +/* Return a REG that occurs in ADDR with coefficient 1. + ADDR can be effectively incremented by incrementing REG. */ + +static rtx +find_addr_reg (rtx addr) +{ + while (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 0)) == REG) + addr = XEXP (addr, 0); + else if (GET_CODE (XEXP (addr, 1)) == REG) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 0))) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 1))) + addr = XEXP (addr, 0); + else + gcc_unreachable (); + } + gcc_assert (GET_CODE (addr) == REG); + return addr; +} + +/* Emit code to perform a block move. + + OPERANDS[0] is the destination pointer as a REG, clobbered. + OPERANDS[1] is the source pointer as a REG, clobbered. + OPERANDS[2] is a register for temporary storage. + OPERANDS[3] is a register for temporary storage. + OPERANDS[4] is the size as a CONST_INT + OPERANDS[5] is the alignment safe to use, as a CONST_INT. + OPERANDS[6] is another temporary register. */ + +const char * +output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) +{ + int align = INTVAL (operands[5]); + unsigned long n_bytes = INTVAL (operands[4]); + + /* We can't move more than a word at a time because the PA + has no longer integer move insns. (Could use fp mem ops?) */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) + { + case 8: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 16); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldd,ma 8(%1),%3", operands); + output_asm_insn ("ldd,ma 8(%1),%6", operands); + output_asm_insn ("std,ma %3,8(%0)", operands); + output_asm_insn ("addib,>= -16,%2,.-12", operands); + output_asm_insn ("std,ma %6,8(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 16 != 0) + { + operands[4] = GEN_INT (n_bytes % 8); + if (n_bytes % 16 >= 8) + output_asm_insn ("ldd,ma 8(%1),%3", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("ldd 0(%1),%6", operands); + if (n_bytes % 16 >= 8) + output_asm_insn ("std,ma %3,8(%0)", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("stdby,e %6,%4(%0)", operands); + } + return ""; + + case 4: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); + output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); + output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); + output_asm_insn ("addib,>= -8,%2,.-12", operands); + output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[4] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) + output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("ldw 0(%1),%6", operands); + if (n_bytes % 8 >= 4) + output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); + } + return ""; + + case 2: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); + output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); + output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); + output_asm_insn ("addib,>= -4,%2,.-12", operands); + output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) + output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("ldb 0(%1),%6", operands); + if (n_bytes % 4 >= 2) + output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %6,0(%0)", operands); + } + return ""; + + case 1: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); + output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); + output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); + output_asm_insn ("addib,>= -2,%2,.-12", operands); + output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 2 != 0) + { + output_asm_insn ("ldb 0(%1),%3", operands); + output_asm_insn ("stb %3,0(%0)", operands); + } + return ""; + + default: + gcc_unreachable (); + } +} + +/* Count the number of insns necessary to handle this block move. + + Basic structure is the same as emit_block_move, except that we + count insns rather than emit them. */ + +static int +compute_movmem_length (rtx insn) +{ + rtx pat = PATTERN (insn); + unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); + unsigned int n_insns = 0; + + /* We can't move more than four bytes at a time because the PA + has no longer integer move insns. (Could use fp mem ops?) */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* The basic copying loop. */ + n_insns = 6; + + /* Residuals. */ + if (n_bytes % (2 * align) != 0) + { + if ((n_bytes % (2 * align)) >= align) + n_insns += 2; + + if ((n_bytes % align) != 0) + n_insns += 2; + } + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; +} + +/* Emit code to perform a block clear. + + OPERANDS[0] is the destination pointer as a REG, clobbered. + OPERANDS[1] is a register for temporary storage. + OPERANDS[2] is the size as a CONST_INT + OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ + +const char * +output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) +{ + int align = INTVAL (operands[3]); + unsigned long n_bytes = INTVAL (operands[2]); + + /* We can't clear more than a word at a time because the PA + has no longer integer move insns. */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) + { + case 8: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 16); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("std,ma %%r0,8(%0)", operands); + output_asm_insn ("addib,>= -16,%1,.-4", operands); + output_asm_insn ("std,ma %%r0,8(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 16 != 0) + { + operands[2] = GEN_INT (n_bytes % 8); + if (n_bytes % 16 >= 8) + output_asm_insn ("std,ma %%r0,8(%0)", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("stdby,e %%r0,%2(%0)", operands); + } + return ""; + + case 4: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + output_asm_insn ("addib,>= -8,%1,.-4", operands); + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[2] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); + } + return ""; + + case 2: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + output_asm_insn ("addib,>= -4,%1,.-4", operands); + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %%r0,0(%0)", operands); + } + return ""; + + case 1: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); + output_asm_insn ("addib,>= -2,%1,.-4", operands); + output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 2 != 0) + output_asm_insn ("stb %%r0,0(%0)", operands); + + return ""; + + default: + gcc_unreachable (); + } +} + +/* Count the number of insns necessary to handle this block move. + + Basic structure is the same as emit_block_move, except that we + count insns rather than emit them. */ + +static int +compute_clrmem_length (rtx insn) +{ + rtx pat = PATTERN (insn); + unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); + unsigned int n_insns = 0; + + /* We can't clear more than a word at a time because the PA + has no longer integer move insns. */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* The basic loop. */ + n_insns = 4; + + /* Residuals. */ + if (n_bytes % (2 * align) != 0) + { + if ((n_bytes % (2 * align)) >= align) + n_insns++; + + if ((n_bytes % align) != 0) + n_insns++; + } + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; +} + + +const char * +output_and (rtx *operands) +{ + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) + { + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int ls0, ls1, ms0, p, len; + + for (ls0 = 0; ls0 < 32; ls0++) + if ((mask & (1 << ls0)) == 0) + break; + + for (ls1 = ls0; ls1 < 32; ls1++) + if ((mask & (1 << ls1)) != 0) + break; + + for (ms0 = ls1; ms0 < 32; ms0++) + if ((mask & (1 << ms0)) == 0) + break; + + gcc_assert (ms0 == 32); + + if (ls1 == 32) + { + len = ls0; + + gcc_assert (len); + + operands[2] = GEN_INT (len); + return "{extru|extrw,u} %1,31,%2,%0"; + } + else + { + /* We could use this `depi' for the case above as well, but `depi' + requires one more register file access than an `extru'. */ + + p = 31 - ls0; + len = ls1 - ls0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "{depi|depwi} 0,%2,%3,%0"; + } + } + else + return "and %1,%2,%0"; +} + +/* Return a string to perform a bitwise-and of operands[1] with operands[2] + storing the result in operands[0]. */ +const char * +output_64bit_and (rtx *operands) +{ + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) + { + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int ls0, ls1, ms0, p, len; + + for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) + break; + + for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) + break; + + for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) + break; + + gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); + + if (ls1 == HOST_BITS_PER_WIDE_INT) + { + len = ls0; + + gcc_assert (len); + + operands[2] = GEN_INT (len); + return "extrd,u %1,63,%2,%0"; + } + else + { + /* We could use this `depi' for the case above as well, but `depi' + requires one more register file access than an `extru'. */ + + p = 63 - ls0; + len = ls1 - ls0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "depdi 0,%2,%3,%0"; + } + } + else + return "and %1,%2,%0"; +} + +const char * +output_ior (rtx *operands) +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int bs0, bs1, p, len; + + if (INTVAL (operands[2]) == 0) + return "copy %1,%0"; + + for (bs0 = 0; bs0 < 32; bs0++) + if ((mask & (1 << bs0)) != 0) + break; + + for (bs1 = bs0; bs1 < 32; bs1++) + if ((mask & (1 << bs1)) == 0) + break; + + gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); + + p = 31 - bs0; + len = bs1 - bs0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "{depi|depwi} -1,%2,%3,%0"; +} + +/* Return a string to perform a bitwise-and of operands[1] with operands[2] + storing the result in operands[0]. */ +const char * +output_64bit_ior (rtx *operands) +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int bs0, bs1, p, len; + + if (INTVAL (operands[2]) == 0) + return "copy %1,%0"; + + for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) + break; + + for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) + if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) + break; + + gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT + || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); + + p = 63 - bs0; + len = bs1 - bs0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "depdi -1,%2,%3,%0"; +} + +/* Target hook for assembling integer objects. This code handles + aligned SI and DI integers specially since function references + must be preceded by P%. */ + +static bool +pa_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + if (size == UNITS_PER_WORD + && aligned_p + && function_label_operand (x, VOIDmode)) + { + fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file); + output_addr_const (asm_out_file, x); + fputc ('\n', asm_out_file); + return true; + } + return default_assemble_integer (x, size, aligned_p); +} + +/* Output an ascii string. */ +void +output_ascii (FILE *file, const char *p, int size) +{ + int i; + int chars_output; + unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ + + /* The HP assembler can only take strings of 256 characters at one + time. This is a limitation on input line length, *not* the + length of the string. Sigh. Even worse, it seems that the + restriction is in number of input characters (see \xnn & + \whatever). So we have to do this very carefully. */ + + fputs ("\t.STRING \"", file); + + chars_output = 0; + for (i = 0; i < size; i += 4) + { + int co = 0; + int io = 0; + for (io = 0, co = 0; io < MIN (4, size - i); io++) + { + register unsigned int c = (unsigned char) p[i + io]; + + if (c == '\"' || c == '\\') + partial_output[co++] = '\\'; + if (c >= ' ' && c < 0177) + partial_output[co++] = c; + else + { + unsigned int hexd; + partial_output[co++] = '\\'; + partial_output[co++] = 'x'; + hexd = c / 16 - 0 + '0'; + if (hexd > '9') + hexd -= '9' - 'a' + 1; + partial_output[co++] = hexd; + hexd = c % 16 - 0 + '0'; + if (hexd > '9') + hexd -= '9' - 'a' + 1; + partial_output[co++] = hexd; + } + } + if (chars_output + co > 243) + { + fputs ("\"\n\t.STRING \"", file); + chars_output = 0; + } + fwrite (partial_output, 1, (size_t) co, file); + chars_output += co; + co = 0; + } + fputs ("\"\n", file); +} + +/* Try to rewrite floating point comparisons & branches to avoid + useless add,tr insns. + + CHECK_NOTES is nonzero if we should examine REG_DEAD notes + to see if FPCC is dead. CHECK_NOTES is nonzero for the + first attempt to remove useless add,tr insns. It is zero + for the second pass as reorg sometimes leaves bogus REG_DEAD + notes lying around. + + When CHECK_NOTES is zero we can only eliminate add,tr insns + when there's a 1:1 correspondence between fcmp and ftest/fbranch + instructions. */ +static void +remove_useless_addtr_insns (int check_notes) +{ + rtx insn; + static int pass = 0; + + /* This is fairly cheap, so always run it when optimizing. */ + if (optimize > 0) + { + int fcmp_count = 0; + int fbranch_count = 0; + + /* Walk all the insns in this function looking for fcmp & fbranch + instructions. Keep track of how many of each we find. */ + for (insn = get_insns (); insn; insn = next_insn (insn)) + { + rtx tmp; + + /* Ignore anything that isn't an INSN or a JUMP_INSN. */ + if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) + continue; + + tmp = PATTERN (insn); + + /* It must be a set. */ + if (GET_CODE (tmp) != SET) + continue; + + /* If the destination is CCFP, then we've found an fcmp insn. */ + tmp = SET_DEST (tmp); + if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) + { + fcmp_count++; + continue; + } + + tmp = PATTERN (insn); + /* If this is an fbranch instruction, bump the fbranch counter. */ + if (GET_CODE (tmp) == SET + && SET_DEST (tmp) == pc_rtx + && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) + { + fbranch_count++; + continue; + } + } + + + /* Find all floating point compare + branch insns. If possible, + reverse the comparison & the branch to avoid add,tr insns. */ + for (insn = get_insns (); insn; insn = next_insn (insn)) + { + rtx tmp, next; + + /* Ignore anything that isn't an INSN. */ + if (GET_CODE (insn) != INSN) + continue; + + tmp = PATTERN (insn); + + /* It must be a set. */ + if (GET_CODE (tmp) != SET) + continue; + + /* The destination must be CCFP, which is register zero. */ + tmp = SET_DEST (tmp); + if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) + continue; + + /* INSN should be a set of CCFP. + + See if the result of this insn is used in a reversed FP + conditional branch. If so, reverse our condition and + the branch. Doing so avoids useless add,tr insns. */ + next = next_insn (insn); + while (next) + { + /* Jumps, calls and labels stop our search. */ + if (GET_CODE (next) == JUMP_INSN + || GET_CODE (next) == CALL_INSN + || GET_CODE (next) == CODE_LABEL) + break; + + /* As does another fcmp insn. */ + if (GET_CODE (next) == INSN + && GET_CODE (PATTERN (next)) == SET + && GET_CODE (SET_DEST (PATTERN (next))) == REG + && REGNO (SET_DEST (PATTERN (next))) == 0) + break; + + next = next_insn (next); + } + + /* Is NEXT_INSN a branch? */ + if (next + && GET_CODE (next) == JUMP_INSN) + { + rtx pattern = PATTERN (next); + + /* If it a reversed fp conditional branch (e.g. uses add,tr) + and CCFP dies, then reverse our conditional and the branch + to avoid the add,tr. */ + if (GET_CODE (pattern) == SET + && SET_DEST (pattern) == pc_rtx + && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 + && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC + && (fcmp_count == fbranch_count + || (check_notes + && find_regno_note (next, REG_DEAD, 0)))) + { + /* Reverse the branch. */ + tmp = XEXP (SET_SRC (pattern), 1); + XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); + XEXP (SET_SRC (pattern), 2) = tmp; + INSN_CODE (next) = -1; + + /* Reverse our condition. */ + tmp = PATTERN (insn); + PUT_CODE (XEXP (tmp, 1), + (reverse_condition_maybe_unordered + (GET_CODE (XEXP (tmp, 1))))); + } + } + } + } + + pass = !pass; + +} + +/* You may have trouble believing this, but this is the 32 bit HP-PA + stack layout. Wow. + + Offset Contents + + Variable arguments (optional; any number may be allocated) + + SP-(4*(N+9)) arg word N + : : + SP-56 arg word 5 + SP-52 arg word 4 + + Fixed arguments (must be allocated; may remain unused) + + SP-48 arg word 3 + SP-44 arg word 2 + SP-40 arg word 1 + SP-36 arg word 0 + + Frame Marker + + SP-32 External Data Pointer (DP) + SP-28 External sr4 + SP-24 External/stub RP (RP') + SP-20 Current RP + SP-16 Static Link + SP-12 Clean up + SP-8 Calling Stub RP (RP'') + SP-4 Previous SP + + Top of Frame + + SP-0 Stack Pointer (points to next available address) + +*/ + +/* This function saves registers as follows. Registers marked with ' are + this function's registers (as opposed to the previous function's). + If a frame_pointer isn't needed, r4 is saved as a general register; + the space for the frame pointer is still allocated, though, to keep + things simple. + + + Top of Frame + + SP (FP') Previous FP + SP + 4 Alignment filler (sigh) + SP + 8 Space for locals reserved here. + . + . + . + SP + n All call saved register used. + . + . + . + SP + o All call saved fp registers used. + . + . + . + SP + p (SP') points to next available address. + +*/ + +/* Global variables set by output_function_prologue(). */ +/* Size of frame. Need to know this to emit return insns from + leaf procedures. */ +static HOST_WIDE_INT actual_fsize, local_fsize; +static int save_fregs; + +/* Emit RTL to store REG at the memory location specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const patterns. + + Note in DISP > 8k case, we will leave the high part of the address + in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ + +static void +store_reg (int reg, HOST_WIDE_INT disp, int base) +{ + rtx insn, dest, src, basereg; + + src = gen_rtx_REG (word_mode, reg); + basereg = gen_rtx_REG (Pmode, base); + if (VAL_14_BITS_P (disp)) + { + dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp)); + insn = emit_move_insn (dest, src); + } + else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) + { + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, delta); + insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); + if (DO_FRAME_NOTES) + { + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, tmpreg, + gen_rtx_PLUS (Pmode, basereg, delta))); + RTX_FRAME_RELATED_P (insn) = 1; + } + dest = gen_rtx_MEM (word_mode, tmpreg); + insn = emit_move_insn (dest, src); + } + else + { + rtx delta = GEN_INT (disp); + rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, high); + dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); + insn = emit_move_insn (dest, src); + if (DO_FRAME_NOTES) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (word_mode, + gen_rtx_PLUS (word_mode, + basereg, + delta)), + src)); + } + + if (DO_FRAME_NOTES) + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Emit RTL to store REG at the memory location specified by BASE and then + add MOD to BASE. MOD must be <= 8k. */ + +static void +store_reg_modify (int base, int reg, HOST_WIDE_INT mod) +{ + rtx insn, basereg, srcreg, delta; + + gcc_assert (VAL_14_BITS_P (mod)); + + basereg = gen_rtx_REG (Pmode, base); + srcreg = gen_rtx_REG (word_mode, reg); + delta = GEN_INT (mod); + + insn = emit_insn (gen_post_store (basereg, srcreg, delta)); + if (DO_FRAME_NOTES) + { + RTX_FRAME_RELATED_P (insn) = 1; + + /* RTX_FRAME_RELATED_P must be set on each frame related set + in a parallel with more than one element. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + } +} + +/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case + where DISP > 8k by using the add_high_const patterns. NOTE indicates + whether to add a frame note or not. + + In the DISP > 8k case, we leave the high part of the address in %r1. + There is code in expand_hppa_{prologue,epilogue} that knows about this. */ + +static void +set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) +{ + rtx insn; + + if (VAL_14_BITS_P (disp)) + { + insn = emit_move_insn (gen_rtx_REG (Pmode, reg), + plus_constant (gen_rtx_REG (Pmode, base), disp)); + } + else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) + { + rtx basereg = gen_rtx_REG (Pmode, base); + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, delta); + insn = emit_move_insn (gen_rtx_REG (Pmode, reg), + gen_rtx_PLUS (Pmode, tmpreg, basereg)); + if (DO_FRAME_NOTES) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, tmpreg, + gen_rtx_PLUS (Pmode, basereg, delta))); + } + else + { + rtx basereg = gen_rtx_REG (Pmode, base); + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, + gen_rtx_PLUS (Pmode, basereg, + gen_rtx_HIGH (Pmode, delta))); + insn = emit_move_insn (gen_rtx_REG (Pmode, reg), + gen_rtx_LO_SUM (Pmode, tmpreg, delta)); + } + + if (DO_FRAME_NOTES && note) + RTX_FRAME_RELATED_P (insn) = 1; +} + +HOST_WIDE_INT +compute_frame_size (HOST_WIDE_INT size, int *fregs_live) +{ + int freg_saved = 0; + int i, j; + + /* The code in hppa_expand_prologue and hppa_expand_epilogue must + be consistent with the rounding and size calculation done here. + Change them at the same time. */ + + /* We do our own stack alignment. First, round the size of the + stack locals up to a word boundary. */ + size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); + + /* Space for previous frame pointer + filler. If any frame is + allocated, we need to add in the STARTING_FRAME_OFFSET. We + waste some space here for the sake of HP compatibility. The + first slot is only used when the frame pointer is needed. */ + if (size || frame_pointer_needed) + size += STARTING_FRAME_OFFSET; + + /* If the current function calls __builtin_eh_return, then we need + to allocate stack space for registers that will hold data for + the exception handler. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i; + + for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) + continue; + size += i * UNITS_PER_WORD; + } + + /* Account for space used by the callee general register saves. */ + for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) + if (df_regs_ever_live_p (i)) + size += UNITS_PER_WORD; + + /* Account for space used by the callee floating point register saves. */ + for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) + if (df_regs_ever_live_p (i) + || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) + { + freg_saved = 1; + + /* We always save both halves of the FP register, so always + increment the frame size by 8 bytes. */ + size += 8; + } + + /* If any of the floating registers are saved, account for the + alignment needed for the floating point register save block. */ + if (freg_saved) + { + size = (size + 7) & ~7; + if (fregs_live) + *fregs_live = 1; + } + + /* The various ABIs include space for the outgoing parameters in the + size of the current function's stack frame. We don't need to align + for the outgoing arguments as their alignment is set by the final + rounding for the frame as a whole. */ + size += crtl->outgoing_args_size; + + /* Allocate space for the fixed frame marker. This space must be + allocated for any function that makes calls or allocates + stack space. */ + if (!current_function_is_leaf || size) + size += TARGET_64BIT ? 48 : 32; + + /* Finally, round to the preferred stack boundary. */ + return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) + & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); +} + +/* Generate the assembly code for function entry. FILE is a stdio + stream to output the code to. SIZE is an int: how many units of + temporary storage to allocate. + + Refer to the array `regs_ever_live' to determine which registers to + save; `regs_ever_live[I]' is nonzero if register number I is ever + used in the function. This function is responsible for knowing + which registers should not be saved even if used. */ + +/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block + of memory. If any fpu reg is used in the function, we allocate + such a block here, at the bottom of the frame, just in case it's needed. + + If this function is a leaf procedure, then we may choose not + to do a "save" insn. The decision about whether or not + to do this is made in regclass.c. */ + +static void +pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + /* The function's label and associated .PROC must never be + separated and must be output *after* any profiling declarations + to avoid changing spaces/subspaces within a procedure. */ + ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); + fputs ("\t.PROC\n", file); + + /* hppa_expand_prologue does the dirty work now. We just need + to output the assembler directives which denote the start + of a function. */ + fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); + if (current_function_is_leaf) + fputs (",NO_CALLS", file); + else + fputs (",CALLS", file); + if (rp_saved) + fputs (",SAVE_RP", file); + + /* The SAVE_SP flag is used to indicate that register %r3 is stored + at the beginning of the frame and that it is used as the frame + pointer for the frame. We do this because our current frame + layout doesn't conform to that specified in the HP runtime + documentation and we need a way to indicate to programs such as + GDB where %r3 is saved. The SAVE_SP flag was chosen because it + isn't used by HP compilers but is supported by the assembler. + However, SAVE_SP is supposed to indicate that the previous stack + pointer has been saved in the frame marker. */ + if (frame_pointer_needed) + fputs (",SAVE_SP", file); + + /* Pass on information about the number of callee register saves + performed in the prologue. + + The compiler is supposed to pass the highest register number + saved, the assembler then has to adjust that number before + entering it into the unwind descriptor (to account for any + caller saved registers with lower register numbers than the + first callee saved register). */ + if (gr_saved) + fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); + + if (fr_saved) + fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); + + fputs ("\n\t.ENTRY\n", file); + + remove_useless_addtr_insns (0); +} + +void +hppa_expand_prologue (void) +{ + int merge_sp_adjust_with_store = 0; + HOST_WIDE_INT size = get_frame_size (); + HOST_WIDE_INT offset; + int i; + rtx insn, tmpreg; + + gr_saved = 0; + fr_saved = 0; + save_fregs = 0; + + /* Compute total size for frame pointer, filler, locals and rounding to + the next word boundary. Similar code appears in compute_frame_size + and must be changed in tandem with this code. */ + local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); + if (local_fsize || frame_pointer_needed) + local_fsize += STARTING_FRAME_OFFSET; + + actual_fsize = compute_frame_size (size, &save_fregs); + if (flag_stack_usage) + current_function_static_stack_size = actual_fsize; + + /* Compute a few things we will use often. */ + tmpreg = gen_rtx_REG (word_mode, 1); + + /* Save RP first. The calling conventions manual states RP will + always be stored into the caller's frame at sp - 20 or sp - 16 + depending on which ABI is in use. */ + if (df_regs_ever_live_p (2) || crtl->calls_eh_return) + { + store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); + rp_saved = true; + } + else + rp_saved = false; + + /* Allocate the local frame and set up the frame pointer if needed. */ + if (actual_fsize != 0) + { + if (frame_pointer_needed) + { + /* Copy the old frame pointer temporarily into %r1. Set up the + new stack pointer, then store away the saved old frame pointer + into the stack at sp and at the same time update the stack + pointer by actual_fsize bytes. Two versions, first + handles small (<8k) frames. The second handles large (>=8k) + frames. */ + insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx); + if (DO_FRAME_NOTES) + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + if (DO_FRAME_NOTES) + RTX_FRAME_RELATED_P (insn) = 1; + + if (VAL_14_BITS_P (actual_fsize)) + store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); + else + { + /* It is incorrect to store the saved frame pointer at *sp, + then increment sp (writes beyond the current stack boundary). + + So instead use stwm to store at *sp and post-increment the + stack pointer as an atomic operation. Then increment sp to + finish allocating the new frame. */ + HOST_WIDE_INT adjust1 = 8192 - 64; + HOST_WIDE_INT adjust2 = actual_fsize - adjust1; + + store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + adjust2, 1); + } + + /* We set SAVE_SP in frames that need a frame pointer. Thus, + we need to store the previous stack pointer (frame pointer) + into the frame marker on targets that use the HP unwind + library. This allows the HP unwind library to be used to + unwind GCC frames. However, we are not fully compatible + with the HP library because our frame layout differs from + that specified in the HP runtime specification. + + We don't want a frame note on this instruction as the frame + marker moves during dynamic stack allocation. + + This instruction also serves as a blockage to prevent + register spills from being scheduled before the stack + pointer is raised. This is necessary as we store + registers using the frame pointer as a base register, + and the frame pointer is set before sp is raised. */ + if (TARGET_HPUX_UNWIND_LIBRARY) + { + rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (TARGET_64BIT ? -8 : -4)); + + emit_move_insn (gen_rtx_MEM (word_mode, addr), + hard_frame_pointer_rtx); + } + else + emit_insn (gen_blockage ()); + } + /* no frame pointer needed. */ + else + { + /* In some cases we can perform the first callee register save + and allocating the stack frame at the same time. If so, just + make a note of it and defer allocating the frame until saving + the callee registers. */ + if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) + merge_sp_adjust_with_store = 1; + /* Can not optimize. Adjust the stack frame by actual_fsize + bytes. */ + else + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + actual_fsize, 1); + } + } + + /* Normal register save. + + Do not save the frame pointer in the frame_pointer_needed case. It + was done earlier. */ + if (frame_pointer_needed) + { + offset = local_fsize; + + /* Saving the EH return data registers in the frame is the simplest + way to get the frame unwind information emitted. We put them + just before the general registers. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 4; i--) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + store_reg (i, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + gr_saved++; + } + /* Account for %r3 which is saved in a special place. */ + gr_saved++; + } + /* No frame pointer needed. */ + else + { + offset = local_fsize - actual_fsize; + + /* Saving the EH return data registers in the frame is the simplest + way to get the frame unwind information emitted. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + /* If merge_sp_adjust_with_store is nonzero, then we can + optimize the first save. */ + if (merge_sp_adjust_with_store) + { + store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); + merge_sp_adjust_with_store = 0; + } + else + store_reg (regno, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 3; i--) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + /* If merge_sp_adjust_with_store is nonzero, then we can + optimize the first GR save. */ + if (merge_sp_adjust_with_store) + { + store_reg_modify (STACK_POINTER_REGNUM, i, -offset); + merge_sp_adjust_with_store = 0; + } + else + store_reg (i, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + gr_saved++; + } + + /* If we wanted to merge the SP adjustment with a GR save, but we never + did any GR saves, then just emit the adjustment here. */ + if (merge_sp_adjust_with_store) + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + actual_fsize, 1); + } + + /* The hppa calling conventions say that %r19, the pic offset + register, is saved at sp - 32 (in this function's frame) + when generating PIC code. FIXME: What is the correct thing + to do for functions which make no calls and allocate no + frame? Do we need to allocate a frame, or can we just omit + the save? For now we'll just omit the save. + + We don't want a note on this insn as the frame marker can + move if there is a dynamic stack allocation. */ + if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) + { + rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); + + emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); + + } + + /* Align pointer properly (doubleword boundary). */ + offset = (offset + 7) & ~7; + + /* Floating point register store. */ + if (save_fregs) + { + rtx base; + + /* First get the frame or stack pointer to the start of the FP register + save area. */ + if (frame_pointer_needed) + { + set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); + base = hard_frame_pointer_rtx; + } + else + { + set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); + base = stack_pointer_rtx; + } + + /* Now actually save the FP registers. */ + for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) + { + if (df_regs_ever_live_p (i) + || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) + { + rtx addr, insn, reg; + addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg)); + reg = gen_rtx_REG (DFmode, i); + insn = emit_move_insn (addr, reg); + if (DO_FRAME_NOTES) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (TARGET_64BIT) + { + rtx mem = gen_rtx_MEM (DFmode, + plus_constant (base, offset)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, mem, reg)); + } + else + { + rtx meml = gen_rtx_MEM (SFmode, + plus_constant (base, offset)); + rtx memr = gen_rtx_MEM (SFmode, + plus_constant (base, offset + 4)); + rtx regl = gen_rtx_REG (SFmode, i); + rtx regr = gen_rtx_REG (SFmode, i + 1); + rtx setl = gen_rtx_SET (VOIDmode, meml, regl); + rtx setr = gen_rtx_SET (VOIDmode, memr, regr); + rtvec vec; + + RTX_FRAME_RELATED_P (setl) = 1; + RTX_FRAME_RELATED_P (setr) = 1; + vec = gen_rtvec (2, setl, setr); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SEQUENCE (VOIDmode, vec)); + } + } + offset += GET_MODE_SIZE (DFmode); + fr_saved++; + } + } + } +} + +/* Emit RTL to load REG from the memory location specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const patterns. */ + +static void +load_reg (int reg, HOST_WIDE_INT disp, int base) +{ + rtx dest = gen_rtx_REG (word_mode, reg); + rtx basereg = gen_rtx_REG (Pmode, base); + rtx src; + + if (VAL_14_BITS_P (disp)) + src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp)); + else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) + { + rtx delta = GEN_INT (disp); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, delta); + if (TARGET_DISABLE_INDEXING) + { + emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); + src = gen_rtx_MEM (word_mode, tmpreg); + } + else + src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); + } + else + { + rtx delta = GEN_INT (disp); + rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); + rtx tmpreg = gen_rtx_REG (Pmode, 1); + + emit_move_insn (tmpreg, high); + src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); + } + + emit_move_insn (dest, src); +} + +/* Update the total code bytes output to the text section. */ + +static void +update_total_code_bytes (unsigned int nbytes) +{ + if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) + && !IN_NAMED_SECTION_P (cfun->decl)) + { + unsigned int old_total = total_code_bytes; + + total_code_bytes += nbytes; + + /* Be prepared to handle overflows. */ + if (old_total > total_code_bytes) + total_code_bytes = UINT_MAX; + } +} + +/* This function generates the assembly code for function exit. + Args are as for output_function_prologue (). + + The function epilogue should not depend on the current stack + pointer! It should use the frame pointer only. This is mandatory + because of alloca; we also take advantage of it to omit stack + adjustments before returning. */ + +static void +pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + rtx insn = get_last_insn (); + + last_address = 0; + + /* hppa_expand_epilogue does the dirty work now. We just need + to output the assembler directives which denote the end + of a function. + + To make debuggers happy, emit a nop if the epilogue was completely + eliminated due to a volatile call as the last insn in the + current function. That way the return address (in %r2) will + always point to a valid instruction in the current function. */ + + /* Get the last real insn. */ + if (GET_CODE (insn) == NOTE) + insn = prev_real_insn (insn); + + /* If it is a sequence, then look inside. */ + if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + insn = XVECEXP (PATTERN (insn), 0, 0); + + /* If insn is a CALL_INSN, then it must be a call to a volatile + function (otherwise there would be epilogue insns). */ + if (insn && GET_CODE (insn) == CALL_INSN) + { + fputs ("\tnop\n", file); + last_address += 4; + } + + fputs ("\t.EXIT\n\t.PROCEND\n", file); + + if (TARGET_SOM && TARGET_GAS) + { + /* We done with this subspace except possibly for some additional + debug information. Forget that we are in this subspace to ensure + that the next function is output in its own subspace. */ + in_section = NULL; + cfun->machine->in_nsubspa = 2; + } + + if (INSN_ADDRESSES_SET_P ()) + { + insn = get_last_nonnote_insn (); + last_address += INSN_ADDRESSES (INSN_UID (insn)); + if (INSN_P (insn)) + last_address += insn_default_length (insn); + last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) + & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); + } + else + last_address = UINT_MAX; + + /* Finally, update the total number of code bytes output so far. */ + update_total_code_bytes (last_address); +} + +void +hppa_expand_epilogue (void) +{ + rtx tmpreg; + HOST_WIDE_INT offset; + HOST_WIDE_INT ret_off = 0; + int i; + int merge_sp_adjust_with_load = 0; + + /* We will use this often. */ + tmpreg = gen_rtx_REG (word_mode, 1); + + /* Try to restore RP early to avoid load/use interlocks when + RP gets used in the return (bv) instruction. This appears to still + be necessary even when we schedule the prologue and epilogue. */ + if (rp_saved) + { + ret_off = TARGET_64BIT ? -16 : -20; + if (frame_pointer_needed) + { + load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM); + ret_off = 0; + } + else + { + /* No frame pointer, and stack is smaller than 8k. */ + if (VAL_14_BITS_P (ret_off - actual_fsize)) + { + load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); + ret_off = 0; + } + } + } + + /* General register restores. */ + if (frame_pointer_needed) + { + offset = local_fsize; + + /* If the current function calls __builtin_eh_return, then we need + to restore the saved EH data registers. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 4; i--) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + load_reg (i, offset, HARD_FRAME_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + else + { + offset = local_fsize - actual_fsize; + + /* If the current function calls __builtin_eh_return, then we need + to restore the saved EH data registers. */ + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + unsigned int i, regno; + + for (i = 0; ; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + /* Only for the first load. + merge_sp_adjust_with_load holds the register load + with which we will merge the sp adjustment. */ + if (merge_sp_adjust_with_load == 0 + && local_fsize == 0 + && VAL_14_BITS_P (-actual_fsize)) + merge_sp_adjust_with_load = regno; + else + load_reg (regno, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + + for (i = 18; i >= 3; i--) + { + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + /* Only for the first load. + merge_sp_adjust_with_load holds the register load + with which we will merge the sp adjustment. */ + if (merge_sp_adjust_with_load == 0 + && local_fsize == 0 + && VAL_14_BITS_P (-actual_fsize)) + merge_sp_adjust_with_load = i; + else + load_reg (i, offset, STACK_POINTER_REGNUM); + offset += UNITS_PER_WORD; + } + } + } + + /* Align pointer properly (doubleword boundary). */ + offset = (offset + 7) & ~7; + + /* FP register restores. */ + if (save_fregs) + { + /* Adjust the register to index off of. */ + if (frame_pointer_needed) + set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); + else + set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); + + /* Actually do the restores now. */ + for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) + if (df_regs_ever_live_p (i) + || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) + { + rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg)); + rtx dest = gen_rtx_REG (DFmode, i); + emit_move_insn (dest, src); + } + } + + /* Emit a blockage insn here to keep these insns from being moved to + an earlier spot in the epilogue, or into the main instruction stream. + + This is necessary as we must not cut the stack back before all the + restores are finished. */ + emit_insn (gen_blockage ()); + + /* Reset stack pointer (and possibly frame pointer). The stack + pointer is initially set to fp + 64 to avoid a race condition. */ + if (frame_pointer_needed) + { + rtx delta = GEN_INT (-64); + + set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0); + emit_insn (gen_pre_load (hard_frame_pointer_rtx, + stack_pointer_rtx, delta)); + } + /* If we were deferring a callee register restore, do it now. */ + else if (merge_sp_adjust_with_load) + { + rtx delta = GEN_INT (-actual_fsize); + rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); + + emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); + } + else if (actual_fsize != 0) + set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, + - actual_fsize, 0); + + /* If we haven't restored %r2 yet (no frame pointer, and a stack + frame greater than 8k), do so now. */ + if (ret_off != 0) + load_reg (2, ret_off, STACK_POINTER_REGNUM); + + if (DO_FRAME_NOTES && crtl->calls_eh_return) + { + rtx sa = EH_RETURN_STACKADJ_RTX; + + emit_insn (gen_blockage ()); + emit_insn (TARGET_64BIT + ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) + : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); + } +} + +bool +pa_can_use_return_insn (void) +{ + if (!reload_completed) + return false; + + if (frame_pointer_needed) + return false; + + if (df_regs_ever_live_p (2)) + return false; + + if (crtl->profile) + return false; + + return compute_frame_size (get_frame_size (), 0) == 0; +} + +rtx +hppa_pic_save_rtx (void) +{ + return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); +} + +#ifndef NO_DEFERRED_PROFILE_COUNTERS +#define NO_DEFERRED_PROFILE_COUNTERS 0 +#endif + + +/* Vector of funcdef numbers. */ +static VEC(int,heap) *funcdef_nos; + +/* Output deferred profile counters. */ +static void +output_deferred_profile_counters (void) +{ + unsigned int i; + int align, n; + + if (VEC_empty (int, funcdef_nos)) + return; + + switch_to_section (data_section); + align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); + ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); + + for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++) + { + targetm.asm_out.internal_label (asm_out_file, "LP", n); + assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); + } + + VEC_free (int, heap, funcdef_nos); +} + +void +hppa_profile_hook (int label_no) +{ + /* We use SImode for the address of the function in both 32 and + 64-bit code to avoid having to provide DImode versions of the + lcla2 and load_offset_label_address insn patterns. */ + rtx reg = gen_reg_rtx (SImode); + rtx label_rtx = gen_label_rtx (); + rtx begin_label_rtx, call_insn; + char begin_label_name[16]; + + ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, + label_no); + begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); + + if (TARGET_64BIT) + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + + emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); + + /* The address of the function is loaded into %r25 with an instruction- + relative sequence that avoids the use of relocations. The sequence + is split so that the load_offset_label_address instruction can + occupy the delay slot of the call to _mcount. */ + if (TARGET_PA_20) + emit_insn (gen_lcla2 (reg, label_rtx)); + else + emit_insn (gen_lcla1 (reg, label_rtx)); + + emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), + reg, begin_label_rtx, label_rtx)); + +#if !NO_DEFERRED_PROFILE_COUNTERS + { + rtx count_label_rtx, addr, r24; + char count_label_name[16]; + + VEC_safe_push (int, heap, funcdef_nos, label_no); + ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); + count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); + + addr = force_reg (Pmode, count_label_rtx); + r24 = gen_rtx_REG (Pmode, 24); + emit_move_insn (r24, addr); + + call_insn = + emit_call_insn (gen_call (gen_rtx_MEM (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + "_mcount")), + GEN_INT (TARGET_64BIT ? 24 : 12))); + + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); + } +#else + + call_insn = + emit_call_insn (gen_call (gen_rtx_MEM (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + "_mcount")), + GEN_INT (TARGET_64BIT ? 16 : 8))); + +#endif + + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); + + /* Indicate the _mcount call cannot throw, nor will it execute a + non-local goto. */ + make_reg_eh_region_note_nothrow_nononlocal (call_insn); +} + +/* Fetch the return address for the frame COUNT steps up from + the current frame, after the prologue. FRAMEADDR is the + frame pointer of the COUNT frame. + + We want to ignore any export stub remnants here. To handle this, + we examine the code at the return address, and if it is an export + stub, we return a memory rtx for the stub return address stored + at frame-24. + + The value returned is used in two different ways: + + 1. To find a function's caller. + + 2. To change the return address for a function. + + This function handles most instances of case 1; however, it will + fail if there are two levels of stubs to execute on the return + path. The only way I believe that can happen is if the return value + needs a parameter relocation, which never happens for C code. + + This function handles most instances of case 2; however, it will + fail if we did not originally have stub code on the return path + but will need stub code on the new return path. This can happen if + the caller & callee are both in the main program, but the new + return location is in a shared library. */ + +rtx +return_addr_rtx (int count, rtx frameaddr) +{ + rtx label; + rtx rp; + rtx saved_rp; + rtx ins; + + /* The instruction stream at the return address of a PA1.X export stub is: + + 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp + 0x004010a1 | stub+12: ldsid (sr0,rp),r1 + 0x00011820 | stub+16: mtsp r1,sr0 + 0xe0400002 | stub+20: be,n 0(sr0,rp) + + 0xe0400002 must be specified as -532676606 so that it won't be + rejected as an invalid immediate operand on 64-bit hosts. + + The instruction stream at the return address of a PA2.0 export stub is: + + 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp + 0xe840d002 | stub+12: bve,n (rp) + */ + + HOST_WIDE_INT insns[4]; + int i, len; + + if (count != 0) + return NULL_RTX; + + rp = get_hard_reg_initial_val (Pmode, 2); + + if (TARGET_64BIT || TARGET_NO_SPACE_REGS) + return rp; + + /* If there is no export stub then just use the value saved from + the return pointer register. */ + + saved_rp = gen_reg_rtx (Pmode); + emit_move_insn (saved_rp, rp); + + /* Get pointer to the instruction stream. We have to mask out the + privilege level from the two low order bits of the return address + pointer here so that ins will point to the start of the first + instruction that would have been executed if we returned. */ + ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); + label = gen_label_rtx (); + + if (TARGET_PA_20) + { + insns[0] = 0x4bc23fd1; + insns[1] = -398405630; + len = 2; + } + else + { + insns[0] = 0x4bc23fd1; + insns[1] = 0x004010a1; + insns[2] = 0x00011820; + insns[3] = -532676606; + len = 4; + } + + /* Check the instruction stream at the normal return address for the + export stub. If it is an export stub, than our return address is + really in -24[frameaddr]. */ + + for (i = 0; i < len; i++) + { + rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4)); + rtx op1 = GEN_INT (insns[i]); + emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); + } + + /* Here we know that our return address points to an export + stub. We don't want to return the address of the export stub, + but rather the return address of the export stub. That return + address is stored at -24[frameaddr]. */ + + emit_move_insn (saved_rp, + gen_rtx_MEM (Pmode, + memory_address (Pmode, + plus_constant (frameaddr, + -24)))); + + emit_label (label); + + return saved_rp; +} + +void +emit_bcond_fp (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[0]); + rtx operand0 = operands[1]; + rtx operand1 = operands[2]; + rtx label = operands[3]; + + emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), + gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); + + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (NE, + VOIDmode, + gen_rtx_REG (CCFPmode, 0), + const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx))); + +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type attr_type; + + /* Don't adjust costs for a pa8000 chip, also do not adjust any + true dependencies as they are described with bypasses now. */ + if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0) + return cost; + + if (! recog_memoized (insn)) + return 0; + + attr_type = get_attr_type (insn); + + switch (REG_NOTE_KIND (link)) + { + case REG_DEP_ANTI: + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + + if (attr_type == TYPE_FPLOAD) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* A fpload can't be issued until one cycle before a + preceding arithmetic operation has finished if + the target of the fpload is any of the sources + (or destination) of the arithmetic operation. */ + return insn_default_latency (dep_insn) - 1; + + default: + return 0; + } + } + } + else if (attr_type == TYPE_FPALU) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* An ALU flop can't be issued until two cycles before a + preceding divide or sqrt operation has finished if + the target of the ALU flop is any of the sources + (or destination) of the divide or sqrt operation. */ + return insn_default_latency (dep_insn) - 2; + + default: + return 0; + } + } + } + + /* For other anti dependencies, the cost is 0. */ + return 0; + + case REG_DEP_OUTPUT: + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if (attr_type == TYPE_FPLOAD) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* A fpload can't be issued until one cycle before a + preceding arithmetic operation has finished if + the target of the fpload is the destination of the + arithmetic operation. + + Exception: For PA7100LC, PA7200 and PA7300, the cost + is 3 cycles, unless they bundle together. We also + pay the penalty if the second insn is a fpload. */ + return insn_default_latency (dep_insn) - 1; + + default: + return 0; + } + } + } + else if (attr_type == TYPE_FPALU) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* An ALU flop can't be issued until two cycles before a + preceding divide or sqrt operation has finished if + the target of the ALU flop is also the target of + the divide or sqrt operation. */ + return insn_default_latency (dep_insn) - 2; + + default: + return 0; + } + } + } + + /* For other output dependencies, the cost is 0. */ + return 0; + + default: + gcc_unreachable (); + } +} + +/* Adjust scheduling priorities. We use this to try and keep addil + and the next use of %r1 close together. */ +static int +pa_adjust_priority (rtx insn, int priority) +{ + rtx set = single_set (insn); + rtx src, dest; + if (set) + { + src = SET_SRC (set); + dest = SET_DEST (set); + if (GET_CODE (src) == LO_SUM + && symbolic_operand (XEXP (src, 1), VOIDmode) + && ! read_only_operand (XEXP (src, 1), VOIDmode)) + priority >>= 3; + + else if (GET_CODE (src) == MEM + && GET_CODE (XEXP (src, 0)) == LO_SUM + && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode) + && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode)) + priority >>= 1; + + else if (GET_CODE (dest) == MEM + && GET_CODE (XEXP (dest, 0)) == LO_SUM + && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode) + && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)) + priority >>= 3; + } + return priority; +} + +/* The 700 can only issue a single insn at a time. + The 7XXX processors can issue two insns at a time. + The 8000 can issue 4 insns at a time. */ +static int +pa_issue_rate (void) +{ + switch (pa_cpu) + { + case PROCESSOR_700: return 1; + case PROCESSOR_7100: return 2; + case PROCESSOR_7100LC: return 2; + case PROCESSOR_7200: return 2; + case PROCESSOR_7300: return 2; + case PROCESSOR_8000: return 4; + + default: + gcc_unreachable (); + } +} + + + +/* Return any length plus adjustment needed by INSN which already has + its length computed as LENGTH. Return LENGTH if no adjustment is + necessary. + + Also compute the length of an inline block move here as it is too + complicated to express as a length attribute in pa.md. */ +int +pa_adjust_insn_length (rtx insn, int length) +{ + rtx pat = PATTERN (insn); + + /* If length is negative or undefined, provide initial length. */ + if ((unsigned int) length >= INT_MAX) + { + if (GET_CODE (pat) == SEQUENCE) + insn = XVECEXP (pat, 0, 0); + + switch (get_attr_type (insn)) + { + case TYPE_MILLI: + length = attr_length_millicode_call (insn); + break; + case TYPE_CALL: + length = attr_length_call (insn, 0); + break; + case TYPE_SIBCALL: + length = attr_length_call (insn, 1); + break; + case TYPE_DYNCALL: + length = attr_length_indirect_call (insn); + break; + case TYPE_SH_FUNC_ADRS: + length = attr_length_millicode_call (insn) + 20; + break; + default: + gcc_unreachable (); + } + } + + /* Jumps inside switch tables which have unfilled delay slots need + adjustment. */ + if (GET_CODE (insn) == JUMP_INSN + && GET_CODE (pat) == PARALLEL + && get_attr_type (insn) == TYPE_BTABLE_BRANCH) + length += 4; + /* Block move pattern. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) + length += compute_movmem_length (insn) - 4; + /* Block clear pattern. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM + && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) + length += compute_clrmem_length (insn) - 4; + /* Conditional branch with an unfilled delay slot. */ + else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) + { + /* Adjust a short backwards conditional with an unfilled delay slot. */ + if (GET_CODE (pat) == SET + && length == 4 + && JUMP_LABEL (insn) != NULL_RTX + && ! forward_branch_p (insn)) + length += 4; + else if (GET_CODE (pat) == PARALLEL + && get_attr_type (insn) == TYPE_PARALLEL_BRANCH + && length == 4) + length += 4; + /* Adjust dbra insn with short backwards conditional branch with + unfilled delay slot -- only for case where counter is in a + general register register. */ + else if (GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 1)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG + && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) + && length == 4 + && ! forward_branch_p (insn)) + length += 4; + } + return length; +} + +/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */ + +static bool +pa_print_operand_punct_valid_p (unsigned char code) +{ + if (code == '@' + || code == '#' + || code == '*' + || code == '^') + return true; + + return false; +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +void +print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case '#': + /* Output a 'nop' if there's nothing for the delay slot. */ + if (dbr_sequence_length () == 0) + fputs ("\n\tnop", file); + return; + case '*': + /* Output a nullification completer if there's nothing for the */ + /* delay slot or nullification is requested. */ + if (dbr_sequence_length () == 0 || + (final_sequence && + INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) + fputs (",n", file); + return; + case 'R': + /* Print out the second register name of a register pair. + I.e., R (6) => 7. */ + fputs (reg_names[REGNO (x) + 1], file); + return; + case 'r': + /* A register or zero. */ + if (x == const0_rtx + || (x == CONST0_RTX (DFmode)) + || (x == CONST0_RTX (SFmode))) + { + fputs ("%r0", file); + return; + } + else + break; + case 'f': + /* A register or zero (floating point). */ + if (x == const0_rtx + || (x == CONST0_RTX (DFmode)) + || (x == CONST0_RTX (SFmode))) + { + fputs ("%fr0", file); + return; + } + else + break; + case 'A': + { + rtx xoperands[2]; + + xoperands[0] = XEXP (XEXP (x, 0), 0); + xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); + output_global_address (file, xoperands[1], 0); + fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); + return; + } + + case 'C': /* Plain (C)ondition */ + case 'X': + switch (GET_CODE (x)) + { + case EQ: + fputs ("=", file); break; + case NE: + fputs ("<>", file); break; + case GT: + fputs (">", file); break; + case GE: + fputs (">=", file); break; + case GEU: + fputs (">>=", file); break; + case GTU: + fputs (">>", file); break; + case LT: + fputs ("<", file); break; + case LE: + fputs ("<=", file); break; + case LEU: + fputs ("<<=", file); break; + case LTU: + fputs ("<<", file); break; + default: + gcc_unreachable (); + } + return; + case 'N': /* Condition, (N)egated */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("<>", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs ("<=", file); break; + case GE: + fputs ("<", file); break; + case GEU: + fputs ("<<", file); break; + case GTU: + fputs ("<<=", file); break; + case LT: + fputs (">=", file); break; + case LE: + fputs (">", file); break; + case LEU: + fputs (">>", file); break; + case LTU: + fputs (">>=", file); break; + default: + gcc_unreachable (); + } + return; + /* For floating point comparisons. Note that the output + predicates are the complement of the desired mode. The + conditions for GT, GE, LT, LE and LTGT cause an invalid + operation exception if the result is unordered and this + exception is enabled in the floating-point status register. */ + case 'Y': + switch (GET_CODE (x)) + { + case EQ: + fputs ("!=", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs ("!>", file); break; + case GE: + fputs ("!>=", file); break; + case LT: + fputs ("!<", file); break; + case LE: + fputs ("!<=", file); break; + case LTGT: + fputs ("!<>", file); break; + case UNLE: + fputs ("!?<=", file); break; + case UNLT: + fputs ("!?<", file); break; + case UNGE: + fputs ("!?>=", file); break; + case UNGT: + fputs ("!?>", file); break; + case UNEQ: + fputs ("!?=", file); break; + case UNORDERED: + fputs ("!?", file); break; + case ORDERED: + fputs ("?", file); break; + default: + gcc_unreachable (); + } + return; + case 'S': /* Condition, operands are (S)wapped. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("=", file); break; + case NE: + fputs ("<>", file); break; + case GT: + fputs ("<", file); break; + case GE: + fputs ("<=", file); break; + case GEU: + fputs ("<<=", file); break; + case GTU: + fputs ("<<", file); break; + case LT: + fputs (">", file); break; + case LE: + fputs (">=", file); break; + case LEU: + fputs (">>=", file); break; + case LTU: + fputs (">>", file); break; + default: + gcc_unreachable (); + } + return; + case 'B': /* Condition, (B)oth swapped and negate. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("<>", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs (">=", file); break; + case GE: + fputs (">", file); break; + case GEU: + fputs (">>", file); break; + case GTU: + fputs (">>=", file); break; + case LT: + fputs ("<=", file); break; + case LE: + fputs ("<", file); break; + case LEU: + fputs ("<<", file); break; + case LTU: + fputs ("<<=", file); break; + default: + gcc_unreachable (); + } + return; + case 'k': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); + return; + case 'Q': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); + return; + case 'L': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); + return; + case 'O': + gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + case 'p': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); + return; + case 'P': + gcc_assert (GET_CODE (x) == CONST_INT); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); + return; + case 'I': + if (GET_CODE (x) == CONST_INT) + fputs ("i", file); + return; + case 'M': + case 'F': + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_DEC: + case PRE_INC: + if (ASSEMBLER_DIALECT == 0) + fputs ("s,mb", file); + else + fputs (",mb", file); + break; + case POST_DEC: + case POST_INC: + if (ASSEMBLER_DIALECT == 0) + fputs ("s,ma", file); + else + fputs (",ma", file); + break; + case PLUS: + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) + { + if (ASSEMBLER_DIALECT == 0) + fputs ("x", file); + } + else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) + { + if (ASSEMBLER_DIALECT == 0) + fputs ("x,s", file); + else + fputs (",s", file); + } + else if (code == 'F' && ASSEMBLER_DIALECT == 0) + fputs ("s", file); + break; + default: + if (code == 'F' && ASSEMBLER_DIALECT == 0) + fputs ("s", file); + break; + } + return; + case 'G': + output_global_address (file, x, 0); + return; + case 'H': + output_global_address (file, x, 1); + return; + case 0: /* Don't do anything special */ + break; + case 'Z': + { + unsigned op[3]; + compute_zdepwi_operands (INTVAL (x), op); + fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); + return; + } + case 'z': + { + unsigned op[3]; + compute_zdepdi_operands (INTVAL (x), op); + fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); + return; + } + case 'c': + /* We can get here from a .vtable_inherit due to our + CONSTANT_ADDRESS_P rejecting perfectly good constant + addresses. */ + break; + default: + gcc_unreachable (); + } + if (GET_CODE (x) == REG) + { + fputs (reg_names [REGNO (x)], file); + if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) + { + fputs ("R", file); + return; + } + if (FP_REG_P (x) + && GET_MODE_SIZE (GET_MODE (x)) <= 4 + && (REGNO (x) & 1) == 0) + fputs ("L", file); + } + else if (GET_CODE (x) == MEM) + { + int size = GET_MODE_SIZE (GET_MODE (x)); + rtx base = NULL_RTX; + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_DEC: + case POST_DEC: + base = XEXP (XEXP (x, 0), 0); + fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); + break; + case PRE_INC: + case POST_INC: + base = XEXP (XEXP (x, 0), 0); + fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); + break; + case PLUS: + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) + fprintf (file, "%s(%s)", + reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], + reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); + else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) + fprintf (file, "%s(%s)", + reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], + reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); + else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) + { + /* Because the REG_POINTER flag can get lost during reload, + GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the + index and base registers in the combined move patterns. */ + rtx base = XEXP (XEXP (x, 0), 1); + rtx index = XEXP (XEXP (x, 0), 0); + + fprintf (file, "%s(%s)", + reg_names [REGNO (index)], reg_names [REGNO (base)]); + } + else + output_address (XEXP (x, 0)); + break; + default: + output_address (XEXP (x, 0)); + break; + } + } + else + output_addr_const (file, x); +} + +/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ + +void +output_global_address (FILE *file, rtx x, int round_constant) +{ + + /* Imagine (high (const (plus ...))). */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) + output_addr_const (file, x); + else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) + { + output_addr_const (file, x); + fputs ("-$global$", file); + } + else if (GET_CODE (x) == CONST) + { + const char *sep = ""; + int offset = 0; /* assembler wants -$global$ at end */ + rtx base = NULL_RTX; + + switch (GET_CODE (XEXP (XEXP (x, 0), 0))) + { + case SYMBOL_REF: + base = XEXP (XEXP (x, 0), 0); + output_addr_const (file, base); + break; + case CONST_INT: + offset = INTVAL (XEXP (XEXP (x, 0), 0)); + break; + default: + gcc_unreachable (); + } + + switch (GET_CODE (XEXP (XEXP (x, 0), 1))) + { + case SYMBOL_REF: + base = XEXP (XEXP (x, 0), 1); + output_addr_const (file, base); + break; + case CONST_INT: + offset = INTVAL (XEXP (XEXP (x, 0), 1)); + break; + default: + gcc_unreachable (); + } + + /* How bogus. The compiler is apparently responsible for + rounding the constant if it uses an LR field selector. + + The linker and/or assembler seem a better place since + they have to do this kind of thing already. + + If we fail to do this, HP's optimizing linker may eliminate + an addil, but not update the ldw/stw/ldo instruction that + uses the result of the addil. */ + if (round_constant) + offset = ((offset + 0x1000) & ~0x1fff); + + switch (GET_CODE (XEXP (x, 0))) + { + case PLUS: + if (offset < 0) + { + offset = -offset; + sep = "-"; + } + else + sep = "+"; + break; + + case MINUS: + gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); + sep = "-"; + break; + + default: + gcc_unreachable (); + } + + if (!read_only_operand (base, VOIDmode) && !flag_pic) + fputs ("-$global$", file); + if (offset) + fprintf (file, "%s%d", sep, offset); + } + else + output_addr_const (file, x); +} + +/* Output boilerplate text to appear at the beginning of the file. + There are several possible versions. */ +#define aputs(x) fputs(x, asm_out_file) +static inline void +pa_file_start_level (void) +{ + if (TARGET_64BIT) + aputs ("\t.LEVEL 2.0w\n"); + else if (TARGET_PA_20) + aputs ("\t.LEVEL 2.0\n"); + else if (TARGET_PA_11) + aputs ("\t.LEVEL 1.1\n"); + else + aputs ("\t.LEVEL 1.0\n"); +} + +static inline void +pa_file_start_space (int sortspace) +{ + aputs ("\t.SPACE $PRIVATE$"); + if (sortspace) + aputs (",SORT=16"); + aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31" + "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" + "\n\t.SPACE $TEXT$"); + if (sortspace) + aputs (",SORT=8"); + aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" + "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); +} + +static inline void +pa_file_start_file (int want_version) +{ + if (write_symbols != NO_DEBUG) + { + output_file_directive (asm_out_file, main_input_filename); + if (want_version) + aputs ("\t.version\t\"01.01\"\n"); + } +} + +static inline void +pa_file_start_mcount (const char *aswhat) +{ + if (profile_flag) + fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); +} + +static void +pa_elf_file_start (void) +{ + pa_file_start_level (); + pa_file_start_mcount ("ENTRY"); + pa_file_start_file (0); +} + +static void +pa_som_file_start (void) +{ + pa_file_start_level (); + pa_file_start_space (0); + aputs ("\t.IMPORT $global$,DATA\n" + "\t.IMPORT $$dyncall,MILLICODE\n"); + pa_file_start_mcount ("CODE"); + pa_file_start_file (0); +} + +static void +pa_linux_file_start (void) +{ + pa_file_start_file (1); + pa_file_start_level (); + pa_file_start_mcount ("CODE"); +} + +static void +pa_hpux64_gas_file_start (void) +{ + pa_file_start_level (); +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE + if (profile_flag) + ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); +#endif + pa_file_start_file (1); +} + +static void +pa_hpux64_hpas_file_start (void) +{ + pa_file_start_level (); + pa_file_start_space (1); + pa_file_start_mcount ("CODE"); + pa_file_start_file (0); +} +#undef aputs + +/* Search the deferred plabel list for SYMBOL and return its internal + label. If an entry for SYMBOL is not found, a new entry is created. */ + +rtx +get_deferred_plabel (rtx symbol) +{ + const char *fname = XSTR (symbol, 0); + size_t i; + + /* See if we have already put this function on the list of deferred + plabels. This list is generally small, so a liner search is not + too ugly. If it proves too slow replace it with something faster. */ + for (i = 0; i < n_deferred_plabels; i++) + if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) + break; + + /* If the deferred plabel list is empty, or this entry was not found + on the list, create a new entry on the list. */ + if (deferred_plabels == NULL || i == n_deferred_plabels) + { + tree id; + + if (deferred_plabels == 0) + deferred_plabels = ggc_alloc_deferred_plabel (); + else + deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel, + deferred_plabels, + n_deferred_plabels + 1); + + i = n_deferred_plabels++; + deferred_plabels[i].internal_label = gen_label_rtx (); + deferred_plabels[i].symbol = symbol; + + /* Gross. We have just implicitly taken the address of this + function. Mark it in the same manner as assemble_name. */ + id = maybe_get_identifier (targetm.strip_name_encoding (fname)); + if (id) + mark_referenced (id); + } + + return deferred_plabels[i].internal_label; +} + +static void +output_deferred_plabels (void) +{ + size_t i; + + /* If we have some deferred plabels, then we need to switch into the + data or readonly data section, and align it to a 4 byte boundary + before outputting the deferred plabels. */ + if (n_deferred_plabels) + { + switch_to_section (flag_pic ? data_section : readonly_data_section); + ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + } + + /* Now output the deferred plabels. */ + for (i = 0; i < n_deferred_plabels; i++) + { + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); + assemble_integer (deferred_plabels[i].symbol, + TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); + } +} + +#if HPUX_LONG_DOUBLE_LIBRARY +/* Initialize optabs to point to HPUX long double emulation routines. */ +static void +pa_hpux_init_libfuncs (void) +{ + set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); + set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); + set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); + set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); + set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); + set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); + set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); + set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); + set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); + + set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); + set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); + set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); + set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); + set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); + set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); + set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); + + set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); + set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); + set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); + set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); + + set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT + ? "__U_Qfcnvfxt_quad_to_sgl" + : "_U_Qfcnvfxt_quad_to_sgl"); + set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); + set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl"); + set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl"); + + set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); + set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); + set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad"); + set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad"); +} +#endif + +/* HP's millicode routines mean something special to the assembler. + Keep track of which ones we have used. */ + +enum millicodes { remI, remU, divI, divU, mulI, end1000 }; +static void import_milli (enum millicodes); +static char imported[(int) end1000]; +static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; +static const char import_string[] = ".IMPORT $$....,MILLICODE"; +#define MILLI_START 10 + +static void +import_milli (enum millicodes code) +{ + char str[sizeof (import_string)]; + + if (!imported[(int) code]) + { + imported[(int) code] = 1; + strcpy (str, import_string); + strncpy (str + MILLI_START, milli_names[(int) code], 4); + output_asm_insn (str, 0); + } +} + +/* The register constraints have put the operands and return value in + the proper registers. */ + +const char * +output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn) +{ + import_milli (mulI); + return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); +} + +/* Emit the rtl for doing a division by a constant. */ + +/* Do magic division millicodes exist for this value? */ +const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; + +/* We'll use an array to keep track of the magic millicodes and + whether or not we've used them already. [n][0] is signed, [n][1] is + unsigned. */ + +static int div_milli[16][2]; + +int +emit_hpdiv_const (rtx *operands, int unsignedp) +{ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) < 16 + && magic_milli[INTVAL (operands[2])]) + { + rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); + + emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); + emit + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), + gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, + SImode, + gen_rtx_REG (SImode, 26), + operands[2])), + gen_rtx_CLOBBER (VOIDmode, operands[4]), + gen_rtx_CLOBBER (VOIDmode, operands[3]), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), + gen_rtx_CLOBBER (VOIDmode, ret)))); + emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); + return 1; + } + return 0; +} + +const char * +output_div_insn (rtx *operands, int unsignedp, rtx insn) +{ + int divisor; + + /* If the divisor is a constant, try to use one of the special + opcodes .*/ + if (GET_CODE (operands[0]) == CONST_INT) + { + static char buf[100]; + divisor = INTVAL (operands[0]); + if (!div_milli[divisor][unsignedp]) + { + div_milli[divisor][unsignedp] = 1; + if (unsignedp) + output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); + else + output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); + } + if (unsignedp) + { + sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, + INTVAL (operands[0])); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, buf)); + } + else + { + sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, + INTVAL (operands[0])); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, buf)); + } + } + /* Divisor isn't a special constant. */ + else + { + if (unsignedp) + { + import_milli (divU); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$divU")); + } + else + { + import_milli (divI); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$divI")); + } + } +} + +/* Output a $$rem millicode to do mod. */ + +const char * +output_mod_insn (int unsignedp, rtx insn) +{ + if (unsignedp) + { + import_milli (remU); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$remU")); + } + else + { + import_milli (remI); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$remI")); + } +} + +void +output_arg_descriptor (rtx call_insn) +{ + const char *arg_regs[4]; + enum machine_mode arg_mode; + rtx link; + int i, output_flag = 0; + int regno; + + /* We neither need nor want argument location descriptors for the + 64bit runtime environment or the ELF32 environment. */ + if (TARGET_64BIT || TARGET_ELF32) + return; + + for (i = 0; i < 4; i++) + arg_regs[i] = 0; + + /* Specify explicitly that no argument relocations should take place + if using the portable runtime calling conventions. */ + if (TARGET_PORTABLE_RUNTIME) + { + fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", + asm_out_file); + return; + } + + gcc_assert (GET_CODE (call_insn) == CALL_INSN); + for (link = CALL_INSN_FUNCTION_USAGE (call_insn); + link; link = XEXP (link, 1)) + { + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + if (regno >= 23 && regno <= 26) + { + arg_regs[26 - regno] = "GR"; + if (arg_mode == DImode) + arg_regs[25 - regno] = "GR"; + } + else if (regno >= 32 && regno <= 39) + { + if (arg_mode == SFmode) + arg_regs[(regno - 32) / 2] = "FR"; + else + { +#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED + arg_regs[(regno - 34) / 2] = "FR"; + arg_regs[(regno - 34) / 2 + 1] = "FU"; +#else + arg_regs[(regno - 34) / 2] = "FU"; + arg_regs[(regno - 34) / 2 + 1] = "FR"; +#endif + } + } + } + fputs ("\t.CALL ", asm_out_file); + for (i = 0; i < 4; i++) + { + if (arg_regs[i]) + { + if (output_flag++) + fputc (',', asm_out_file); + fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); + } + } + fputc ('\n', asm_out_file); +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. */ + +static reg_class_t +pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + int regno; + enum reg_class rclass = (enum reg_class) rclass_i; + + /* Handle the easy stuff first. */ + if (rclass == R1_REGS) + return NO_REGS; + + if (REG_P (x)) + { + regno = REGNO (x); + if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) + return NO_REGS; + } + else + regno = -1; + + /* If we have something like (mem (mem (...)), we can safely assume the + inner MEM will end up in a general register after reloading, so there's + no need for a secondary reload. */ + if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) + return NO_REGS; + + /* Trying to load a constant into a FP register during PIC code + generation requires %r1 as a scratch register. */ + if (flag_pic + && (mode == SImode || mode == DImode) + && FP_REG_CLASS_P (rclass) + && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) + { + sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1 + : CODE_FOR_reload_indi_r1); + return NO_REGS; + } + + /* Secondary reloads of symbolic operands require %r1 as a scratch + register when we're generating PIC code and when the operand isn't + readonly. */ + if (symbolic_expression_p (x)) + { + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + if (flag_pic || !read_only_operand (x, VOIDmode)) + { + gcc_assert (mode == SImode || mode == DImode); + sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1 + : CODE_FOR_reload_indi_r1); + return NO_REGS; + } + } + + /* Profiling showed the PA port spends about 1.3% of its compilation + time in true_regnum from calls inside pa_secondary_reload_class. */ + if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) + regno = true_regnum (x); + + /* In order to allow 14-bit displacements in integer loads and stores, + we need to prevent reload from generating out of range integer mode + loads and stores to the floating point registers. Previously, we + used to call for a secondary reload and have emit_move_sequence() + fix the instruction sequence. However, reload occasionally wouldn't + generate the reload and we would end up with an invalid REG+D memory + address. So, now we use an intermediate general register for most + memory loads and stores. */ + if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) + && GET_MODE_CLASS (mode) == MODE_INT + && FP_REG_CLASS_P (rclass)) + { + /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check + the secondary reload needed for a pseudo. It never passes a + REG+D address. */ + if (GET_CODE (x) == MEM) + { + x = XEXP (x, 0); + + /* We don't need an intermediate for indexed and LO_SUM DLT + memory addresses. When INT14_OK_STRICT is true, it might + appear that we could directly allow register indirect + memory addresses. However, this doesn't work because we + don't support SUBREGs in floating-point register copies + and reload doesn't tell us when it's going to use a SUBREG. */ + if (IS_INDEX_ADDR_P (x) + || IS_LO_SUM_DLT_ADDR_P (x)) + return NO_REGS; + + /* Otherwise, we need an intermediate general register. */ + return GENERAL_REGS; + } + + /* Request a secondary reload with a general scratch register + for everthing else. ??? Could symbolic operands be handled + directly when generating non-pic PA 2.0 code? */ + sri->icode = (in_p + ? direct_optab_handler (reload_in_optab, mode) + : direct_optab_handler (reload_out_optab, mode)); + return NO_REGS; + } + + /* A SAR<->FP register copy requires an intermediate general register + and secondary memory. We need a secondary reload with a general + scratch register for spills. */ + if (rclass == SHIFT_REGS) + { + /* Handle spill. */ + if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) + { + sri->icode = (in_p + ? direct_optab_handler (reload_in_optab, mode) + : direct_optab_handler (reload_out_optab, mode)); + return NO_REGS; + } + + /* Handle FP copy. */ + if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) + return GENERAL_REGS; + } + + if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER + && REGNO_REG_CLASS (regno) == SHIFT_REGS + && FP_REG_CLASS_P (rclass)) + return GENERAL_REGS; + + return NO_REGS; +} + +/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer + is only marked as live on entry by df-scan when it is a fixed + register. It isn't a fixed register in the 64-bit runtime, + so we need to mark it here. */ + +static void +pa_extra_live_on_entry (bitmap regs) +{ + if (TARGET_64BIT) + bitmap_set_bit (regs, ARG_POINTER_REGNUM); +} + +/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile + to prevent it from being deleted. */ + +rtx +pa_eh_return_handler_rtx (void) +{ + rtx tmp; + + tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx, + TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); + tmp = gen_rtx_MEM (word_mode, tmp); + tmp->volatil = 1; + return tmp; +} + +/* In the 32-bit runtime, arguments larger than eight bytes are passed + by invisible reference. As a GCC extension, we also pass anything + with a zero or variable size by reference. + + The 64-bit runtime does not describe passing any types by invisible + reference. The internals of GCC can't currently handle passing + empty structures, and zero or variable length arrays when they are + not passed entirely on the stack or by reference. Thus, as a GCC + extension, we pass these types by reference. The HP compiler doesn't + support these types, so hopefully there shouldn't be any compatibility + issues. This may have to be revisited when HP releases a C99 compiler + or updates the ABI. */ + +static bool +pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size; + + if (type) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + if (TARGET_64BIT) + return size <= 0; + else + return size <= 0 || size > 8; +} + +enum direction +function_arg_padding (enum machine_mode mode, const_tree type) +{ + if (mode == BLKmode + || (TARGET_64BIT + && type + && (AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE))) + { + /* Return none if justification is not required. */ + if (type + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) + return none; + + /* The directions set here are ignored when a BLKmode argument larger + than a word is placed in a register. Different code is used for + the stack and registers. This makes it difficult to have a + consistent data representation for both the stack and registers. + For both runtimes, the justification and padding for arguments on + the stack and in registers should be identical. */ + if (TARGET_64BIT) + /* The 64-bit runtime specifies left justification for aggregates. */ + return upward; + else + /* The 32-bit runtime architecture specifies right justification. + When the argument is passed on the stack, the argument is padded + with garbage on the left. The HP compiler pads with zeros. */ + return downward; + } + + if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) + return downward; + else + return none; +} + + +/* Do what is necessary for `va_start'. We look at the current function + to determine if stdargs or varargs is used and fill in an initial + va_list. A pointer to this constructor is returned. */ + +static rtx +hppa_builtin_saveregs (void) +{ + rtx offset, dest; + tree fntype = TREE_TYPE (current_function_decl); + int argadj = ((!stdarg_p (fntype)) + ? UNITS_PER_WORD : 0); + + if (argadj) + offset = plus_constant (crtl->args.arg_offset_rtx, argadj); + else + offset = crtl->args.arg_offset_rtx; + + if (TARGET_64BIT) + { + int i, off; + + /* Adjust for varargs/stdarg differences. */ + if (argadj) + offset = plus_constant (crtl->args.arg_offset_rtx, -argadj); + else + offset = crtl->args.arg_offset_rtx; + + /* We need to save %r26 .. %r19 inclusive starting at offset -64 + from the incoming arg pointer and growing to larger addresses. */ + for (i = 26, off = -64; i >= 19; i--, off += 8) + emit_move_insn (gen_rtx_MEM (word_mode, + plus_constant (arg_pointer_rtx, off)), + gen_rtx_REG (word_mode, i)); + + /* The incoming args pointer points just beyond the flushback area; + normally this is not a serious concern. However, when we are doing + varargs/stdargs we want to make the arg pointer point to the start + of the incoming argument area. */ + emit_move_insn (virtual_incoming_args_rtx, + plus_constant (arg_pointer_rtx, -64)); + + /* Now return a pointer to the first anonymous argument. */ + return copy_to_reg (expand_binop (Pmode, add_optab, + virtual_incoming_args_rtx, + offset, 0, 0, OPTAB_LIB_WIDEN)); + } + + /* Store general registers on the stack. */ + dest = gen_rtx_MEM (BLKmode, + plus_constant (crtl->args.internal_arg_pointer, + -16)); + set_mem_alias_set (dest, get_varargs_alias_set ()); + set_mem_align (dest, BITS_PER_WORD); + move_block_from_reg (23, dest, 4); + + /* move_block_from_reg will emit code to store the argument registers + individually as scalar stores. + + However, other insns may later load from the same addresses for + a structure load (passing a struct to a varargs routine). + + The alias code assumes that such aliasing can never happen, so we + have to keep memory referencing insns from moving up beyond the + last argument register store. So we emit a blockage insn here. */ + emit_insn (gen_blockage ()); + + return copy_to_reg (expand_binop (Pmode, add_optab, + crtl->args.internal_arg_pointer, + offset, 0, 0, OPTAB_LIB_WIDEN)); +} + +static void +hppa_va_start (tree valist, rtx nextarg) +{ + nextarg = expand_builtin_saveregs (); + std_expand_builtin_va_start (valist, nextarg); +} + +static tree +hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + if (TARGET_64BIT) + { + /* Args grow upward. We can use the generic routines. */ + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + } + else /* !TARGET_64BIT */ + { + tree ptr = build_pointer_type (type); + tree valist_type; + tree t, u; + unsigned int size, ofs; + bool indirect; + + indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); + if (indirect) + { + type = ptr; + ptr = build_pointer_type (type); + } + size = int_size_in_bytes (type); + valist_type = TREE_TYPE (valist); + + /* Args grow down. Not handled by generic routines. */ + + u = fold_convert (sizetype, size_in_bytes (type)); + u = fold_build1 (NEGATE_EXPR, sizetype, u); + t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u); + + /* Align to 4 or 8 byte boundary depending on argument size. */ + + u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); + t = fold_convert (valist_type, t); + + t = build2 (MODIFY_EXPR, valist_type, valist, t); + + ofs = (8 - size) % 4; + if (ofs != 0) + { + u = size_int (ofs); + t = build2 (POINTER_PLUS_EXPR, valist_type, t, u); + } + + t = fold_convert (ptr, t); + t = build_va_arg_indirect_ref (t); + + if (indirect) + t = build_va_arg_indirect_ref (t); + + return t; + } +} + +/* True if MODE is valid for the target. By "valid", we mean able to + be manipulated in non-trivial ways. In particular, this means all + the arithmetic is supported. + + Currently, TImode is not valid as the HP 64-bit runtime documentation + doesn't document the alignment and calling conventions for this type. + Thus, we return false when PRECISION is 2 * BITS_PER_WORD and + 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ + +static bool +pa_scalar_mode_supported_p (enum machine_mode mode) +{ + int precision = GET_MODE_PRECISION (mode); + + switch (GET_MODE_CLASS (mode)) + { + case MODE_PARTIAL_INT: + case MODE_INT: + if (precision == CHAR_TYPE_SIZE) + return true; + if (precision == SHORT_TYPE_SIZE) + return true; + if (precision == INT_TYPE_SIZE) + return true; + if (precision == LONG_TYPE_SIZE) + return true; + if (precision == LONG_LONG_TYPE_SIZE) + return true; + return false; + + case MODE_FLOAT: + if (precision == FLOAT_TYPE_SIZE) + return true; + if (precision == DOUBLE_TYPE_SIZE) + return true; + if (precision == LONG_DOUBLE_TYPE_SIZE) + return true; + return false; + + case MODE_DECIMAL_FLOAT: + return false; + + default: + gcc_unreachable (); + } +} + +/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and + it branches into the delay slot. Otherwise, return FALSE. */ + +static bool +branch_to_delay_slot_p (rtx insn) +{ + rtx jump_insn; + + if (dbr_sequence_length ()) + return FALSE; + + jump_insn = next_active_insn (JUMP_LABEL (insn)); + while (insn) + { + insn = next_active_insn (insn); + if (jump_insn == insn) + return TRUE; + + /* We can't rely on the length of asms. So, we return FALSE when + the branch is followed by an asm. */ + if (!insn + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || extract_asm_operands (PATTERN (insn)) != NULL_RTX + || get_attr_length (insn) > 0) + break; + } + + return FALSE; +} + +/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. + + This occurs when INSN has an unfilled delay slot and is followed + by an asm. Disaster can occur if the asm is empty and the jump + branches into the delay slot. So, we add a nop in the delay slot + when this occurs. */ + +static bool +branch_needs_nop_p (rtx insn) +{ + rtx jump_insn; + + if (dbr_sequence_length ()) + return FALSE; + + jump_insn = next_active_insn (JUMP_LABEL (insn)); + while (insn) + { + insn = next_active_insn (insn); + if (!insn || jump_insn == insn) + return TRUE; + + if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT + || extract_asm_operands (PATTERN (insn)) != NULL_RTX) + && get_attr_length (insn) > 0) + break; + } + + return FALSE; +} + +/* Return TRUE if INSN, a forward jump insn, can use nullification + to skip the following instruction. This avoids an extra cycle due + to a mis-predicted branch when we fall through. */ + +static bool +use_skip_p (rtx insn) +{ + rtx jump_insn = next_active_insn (JUMP_LABEL (insn)); + + while (insn) + { + insn = next_active_insn (insn); + + /* We can't rely on the length of asms, so we can't skip asms. */ + if (!insn + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || extract_asm_operands (PATTERN (insn)) != NULL_RTX) + break; + if (get_attr_length (insn) == 4 + && jump_insn == next_active_insn (insn)) + return TRUE; + if (get_attr_length (insn) > 0) + break; + } + + return FALSE; +} + +/* This routine handles all the normal conditional branch sequences we + might need to generate. It handles compare immediate vs compare + register, nullification of delay slots, varying length branches, + negated branches, and all combinations of the above. It returns the + output appropriate to emit the branch corresponding to all given + parameters. */ + +const char * +output_cbranch (rtx *operands, int negated, rtx insn) +{ + static char buf[100]; + bool useskip; + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + int xdelay; + + /* A conditional branch to the following instruction (e.g. the delay slot) + is asking for a disaster. This can happen when not optimizing and + when jump optimization fails. + + While it is usually safe to emit nothing, this can fail if the + preceding instruction is a nullified branch with an empty delay + slot and the same branch target as this branch. We could check + for this but jump optimization should eliminate nop jumps. It + is always safe to emit a nop. */ + if (branch_to_delay_slot_p (insn)) + return "nop"; + + /* The doubleword form of the cmpib instruction doesn't have the LEU + and GTU conditions while the cmpb instruction does. Since we accept + zero for cmpb, we must ensure that we use cmpb for the comparison. */ + if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) + operands[2] = gen_rtx_REG (DImode, 0); + if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) + operands[1] = gen_rtx_REG (DImode, 0); + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + comclr instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; + + switch (length) + { + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); + else + strcpy (buf, "{com%I2b,|cmp%I2b,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%B3"); + else + strcat (buf, "%S3"); + if (useskip) + strcat (buf, " %2,%r1,%%r0"); + else if (nullify) + { + if (branch_needs_nop_p (insn)) + strcat (buf, ",n %2,%r1,%0%#"); + else + strcat (buf, ",n %2,%r1,%0"); + } + else + strcat (buf, " %2,%r1,%0"); + break; + + /* All long conditionals. Note a short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "{com%I2b,|cmp%I2b,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%S3"); + else + strcat (buf, "%B3"); + strcat (buf, ",n %2,%r1,.+12\n\tb %0"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a comb;nop rather than comiclr;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + { + strcpy (buf, "{com%I2b,|cmp%I2b,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%B3 %2,%r1,%0%#"); + else + strcat (buf, "%S3 %2,%r1,%0%#"); + } + else + { + strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); + if (GET_MODE (operands[1]) == DImode) + strcat (buf, "*"); + if (negated) + strcat (buf, "%S3"); + else + strcat (buf, "%B3"); + if (nullify) + strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); + else + strcat (buf, " %2,%r1,%%r0\n\tb %0"); + } + break; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + /* Create a reversed conditional branch which branches around + the following insns. */ + if (GET_MODE (operands[1]) != DImode) + { + if (nullify) + { + if (negated) + strcpy (buf, + "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); + } + else + { + if (negated) + strcpy (buf, + "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); + } + } + else + { + if (nullify) + { + if (negated) + strcpy (buf, + "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); + } + else + { + if (negated) + strcpy (buf, + "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); + else + strcpy (buf, + "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); + } + } + + output_asm_insn (buf, operands); + return output_lbranch (operands[0], insn, xdelay); + } + return buf; +} + +/* This routine handles output of long unconditional branches that + exceed the maximum range of a simple branch instruction. Since + we don't have a register available for the branch, we save register + %r1 in the frame marker, load the branch destination DEST into %r1, + execute the branch, and restore %r1 in the delay slot of the branch. + + Since long branches may have an insn in the delay slot and the + delay slot is used to restore %r1, we in general need to extract + this insn and execute it before the branch. However, to facilitate + use of this function by conditional branches, we also provide an + option to not extract the delay insn so that it will be emitted + after the long branch. So, if there is an insn in the delay slot, + it is extracted if XDELAY is nonzero. + + The lengths of the various long-branch sequences are 20, 16 and 24 + bytes for the portable runtime, non-PIC and PIC cases, respectively. */ + +const char * +output_lbranch (rtx dest, rtx insn, int xdelay) +{ + rtx xoperands[2]; + + xoperands[0] = dest; + + /* First, free up the delay slot. */ + if (xdelay && dbr_sequence_length () != 0) + { + /* We can't handle a jump in the delay slot. */ + gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN); + + final_scan_insn (NEXT_INSN (insn), asm_out_file, + optimize, 0, NULL); + + /* Now delete the delay insn. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + } + + /* Output an insn to save %r1. The runtime documentation doesn't + specify whether the "Clean Up" slot in the callers frame can + be clobbered by the callee. It isn't copied by HP's builtin + alloca, so this suggests that it can be clobbered if necessary. + The "Static Link" location is copied by HP builtin alloca, so + we avoid using it. Using the cleanup slot might be a problem + if we have to interoperate with languages that pass cleanup + information. However, it should be possible to handle these + situations with GCC's asm feature. + + The "Current RP" slot is reserved for the called procedure, so + we try to use it when we don't have a frame of our own. It's + rather unlikely that we won't have a frame when we need to emit + a very long branch. + + Really the way to go long term is a register scavenger; goto + the target of the jump and find a register which we can use + as a scratch to hold the value in %r1. Then, we wouldn't have + to free up the delay slot or clobber a slot that may be needed + for other purposes. */ + if (TARGET_64BIT) + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + /* Use the return pointer slot in the frame marker. */ + output_asm_insn ("std %%r1,-16(%%r30)", xoperands); + else + /* Use the slot at -40 in the frame marker since HP builtin + alloca doesn't copy it. */ + output_asm_insn ("std %%r1,-40(%%r30)", xoperands); + } + else + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + /* Use the return pointer slot in the frame marker. */ + output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); + else + /* Use the "Clean Up" slot in the frame marker. In GCC, + the only other use of this location is for copying a + floating point double argument from a floating-point + register to two general registers. The copy is done + as an "atomic" operation when outputting a call, so it + won't interfere with our using the location here. */ + output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); + } + + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + else if (flag_pic) + { + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + if (TARGET_SOM || !TARGET_GAS) + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); + } + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + else + /* Now output a very long branch to the original target. */ + output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); + + /* Now restore the value of %r1 in the delay slot. */ + if (TARGET_64BIT) + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + return "ldd -16(%%r30),%%r1"; + else + return "ldd -40(%%r30),%%r1"; + } + else + { + if (actual_fsize == 0 && !df_regs_ever_live_p (2)) + return "ldw -20(%%r30),%%r1"; + else + return "ldw -12(%%r30),%%r1"; + } +} + +/* This routine handles all the branch-on-bit conditional branch sequences we + might need to generate. It handles nullification of delay slots, + varying length branches, negated branches and all combinations of the + above. it returns the appropriate output template to emit the branch. */ + +const char * +output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) +{ + static char buf[100]; + bool useskip; + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + int xdelay; + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. I do not think this can happen as this pattern + is only used when optimizing; jump optimization should eliminate the + jump. But be prepared just in case. */ + + if (branch_to_delay_slot_p (insn)) + return "nop"; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + extrs instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; + + switch (length) + { + + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "{extrs,|extrw,s,}"); + else + strcpy (buf, "bb,"); + if (useskip && GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + else if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (useskip) + strcat (buf, " %0,%1,1,%%r0"); + else if (nullify && negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, ",n %0,%1,%3%#"); + else + strcat (buf, ",n %0,%1,%3"); + } + else if (nullify && ! negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, ",n %0,%1,%2%#"); + else + strcat (buf, ",n %0,%1,%2"); + } + else if (! nullify && negated) + strcat (buf, " %0,%1,%3"); + else if (! nullify && ! negated) + strcat (buf, " %0,%1,%2"); + break; + + /* All long conditionals. Note a short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "bb,"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (negated) + strcat (buf, ",n %0,%1,.+12\n\tb %3"); + else + strcat (buf, ",n %0,%1,.+12\n\tb %2"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a bb;nop rather than extrs;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + { + strcpy (buf, "bb,"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (negated) + strcat (buf, " %0,%1,%3%#"); + else + strcat (buf, " %0,%1,%2%#"); + } + else + { + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + else + strcpy (buf, "{extrs,|extrw,s,}"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify && negated) + strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); + else if (nullify && ! negated) + strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); + else if (negated) + strcat (buf, " %0,%1,1,%%r0\n\tb %3"); + else + strcat (buf, " %0,%1,1,%%r0\n\tb %2"); + } + break; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + else + strcpy (buf, "bb,"); + if ((which == 0 && negated) + || (which == 1 && !negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify) + strcat (buf, ",n %0,%1,.+%4"); + else + strcat (buf, " %0,%1,.+%4"); + output_asm_insn (buf, operands); + return output_lbranch (negated ? operands[3] : operands[2], + insn, xdelay); + } + return buf; +} + +/* This routine handles all the branch-on-variable-bit conditional branch + sequences we might need to generate. It handles nullification of delay + slots, varying length branches, negated branches and all combinations + of the above. it returns the appropriate output template to emit the + branch. */ + +const char * +output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) +{ + static char buf[100]; + bool useskip; + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + int xdelay; + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. I do not think this can happen as this pattern + is only used when optimizing; jump optimization should eliminate the + jump. But be prepared just in case. */ + + if (branch_to_delay_slot_p (insn)) + return "nop"; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + extrs instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; + + switch (length) + { + + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "{vextrs,|extrw,s,}"); + else + strcpy (buf, "{bvb,|bb,}"); + if (useskip && GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + else if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (useskip) + strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); + else if (nullify && negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); + else + strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); + } + else if (nullify && ! negated) + { + if (branch_needs_nop_p (insn)) + strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); + else + strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); + } + else if (! nullify && negated) + strcat (buf, "{ %0,%3| %0,%%sar,%3}"); + else if (! nullify && ! negated) + strcat (buf, "{ %0,%2| %0,%%sar,%2}"); + break; + + /* All long conditionals. Note a short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "{bvb,|bb,}"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (negated) + strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); + else + strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a bb;nop rather than extrs;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + { + strcpy (buf, "{bvb,|bb,}"); + if (GET_MODE (operands[0]) == DImode) + strcat (buf, "*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (negated) + strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); + else + strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); + } + else + { + strcpy (buf, "{vextrs,|extrw,s,}"); + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "extrd,s,*"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify && negated) + strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); + else if (nullify && ! negated) + strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); + else if (negated) + strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); + else + strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); + } + break; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (GET_MODE (operands[0]) == DImode) + strcpy (buf, "bb,*"); + else + strcpy (buf, "{bvb,|bb,}"); + if ((which == 0 && negated) + || (which == 1 && !negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify) + strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); + else + strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); + output_asm_insn (buf, operands); + return output_lbranch (negated ? operands[3] : operands[2], + insn, xdelay); + } + return buf; +} + +/* Return the output template for emitting a dbra type insn. + + Note it may perform some output operations on its own before + returning the final output string. */ +const char * +output_dbra (rtx *operands, rtx insn, int which_alternative) +{ + int length = get_attr_length (insn); + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. Be prepared! */ + + if (branch_to_delay_slot_p (insn)) + { + if (which_alternative == 0) + return "ldo %1(%0),%0"; + else if (which_alternative == 1) + { + output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); + output_asm_insn ("ldw -16(%%r30),%4", operands); + output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); + return "{fldws|fldw} -16(%%r30),%0"; + } + else + { + output_asm_insn ("ldw %0,%4", operands); + return "ldo %1(%4),%4\n\tstw %4,%0"; + } + } + + if (which_alternative == 0) + { + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int xdelay; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + switch (length) + { + case 4: + if (nullify) + { + if (branch_needs_nop_p (insn)) + return "addib,%C2,n %1,%0,%3%#"; + else + return "addib,%C2,n %1,%0,%3"; + } + else + return "addib,%C2 %1,%0,%3"; + + case 8: + /* Handle weird backwards branch with a fulled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + return "addib,%N2,n %1,%0,.+12\n\tb %3"; + /* Handle short backwards branch with an unfilled delay slot. + Using a addb;nop rather than addi;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + return "addib,%C2 %1,%0,%3%#"; + + /* Handle normal cases. */ + if (nullify) + return "addi,%N2 %1,%0,%0\n\tb,n %3"; + else + return "addi,%N2 %1,%0,%0\n\tb %3"; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (nullify) + output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); + else + output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); + + return output_lbranch (operands[3], insn, xdelay); + } + + } + /* Deal with gross reload from FP register case. */ + else if (which_alternative == 1) + { + /* Move loop counter from FP register to MEM then into a GR, + increment the GR, store the GR into MEM, and finally reload + the FP register from MEM from within the branch's delay slot. */ + output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", + operands); + output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); + if (length == 24) + return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; + else if (length == 28) + return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; + else + { + operands[5] = GEN_INT (length - 16); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); + output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); + return output_lbranch (operands[3], insn, 0); + } + } + /* Deal with gross reload from memory case. */ + else + { + /* Reload loop counter from memory, the store back to memory + happens in the branch's delay slot. */ + output_asm_insn ("ldw %0,%4", operands); + if (length == 12) + return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; + else if (length == 16) + return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; + else + { + operands[5] = GEN_INT (length - 4); + output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); + return output_lbranch (operands[3], insn, 0); + } + } +} + +/* Return the output template for emitting a movb type insn. + + Note it may perform some output operations on its own before + returning the final output string. */ +const char * +output_movb (rtx *operands, rtx insn, int which_alternative, + int reverse_comparison) +{ + int length = get_attr_length (insn); + + /* A conditional branch to the following instruction (e.g. the delay slot) is + asking for a disaster. Be prepared! */ + + if (branch_to_delay_slot_p (insn)) + { + if (which_alternative == 0) + return "copy %1,%0"; + else if (which_alternative == 1) + { + output_asm_insn ("stw %1,-16(%%r30)", operands); + return "{fldws|fldw} -16(%%r30),%0"; + } + else if (which_alternative == 2) + return "stw %1,%0"; + else + return "mtsar %r1"; + } + + /* Support the second variant. */ + if (reverse_comparison) + PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); + + if (which_alternative == 0) + { + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int xdelay; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + switch (length) + { + case 4: + if (nullify) + { + if (branch_needs_nop_p (insn)) + return "movb,%C2,n %1,%0,%3%#"; + else + return "movb,%C2,n %1,%0,%3"; + } + else + return "movb,%C2 %1,%0,%3"; + + case 8: + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + return "movb,%N2,n %1,%0,.+12\n\tb %3"; + + /* Handle short backwards branch with an unfilled delay slot. + Using a movb;nop rather than or;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && INSN_ADDRESSES_SET_P () + && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) + - INSN_ADDRESSES (INSN_UID (insn)) - 8)) + return "movb,%C2 %1,%0,%3%#"; + /* Handle normal cases. */ + if (nullify) + return "or,%N2 %1,%%r0,%0\n\tb,n %3"; + else + return "or,%N2 %1,%%r0,%0\n\tb %3"; + + default: + /* The reversed conditional branch must branch over one additional + instruction if the delay slot is filled and needs to be extracted + by output_lbranch. If the delay slot is empty or this is a + nullified forward branch, the instruction after the reversed + condition branch must be nullified. */ + if (dbr_sequence_length () == 0 + || (nullify && forward_branch_p (insn))) + { + nullify = 1; + xdelay = 0; + operands[4] = GEN_INT (length); + } + else + { + xdelay = 1; + operands[4] = GEN_INT (length + 4); + } + + if (nullify) + output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); + else + output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); + + return output_lbranch (operands[3], insn, xdelay); + } + } + /* Deal with gross reload for FP destination register case. */ + else if (which_alternative == 1) + { + /* Move source register to MEM, perform the branch test, then + finally load the FP register from MEM from within the branch's + delay slot. */ + output_asm_insn ("stw %1,-16(%%r30)", operands); + if (length == 12) + return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; + else if (length == 16) + return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; + else + { + operands[4] = GEN_INT (length - 4); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); + output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); + return output_lbranch (operands[3], insn, 0); + } + } + /* Deal with gross reload from memory case. */ + else if (which_alternative == 2) + { + /* Reload loop counter from memory, the store back to memory + happens in the branch's delay slot. */ + if (length == 8) + return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; + else if (length == 12) + return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; + else + { + operands[4] = GEN_INT (length); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", + operands); + return output_lbranch (operands[3], insn, 0); + } + } + /* Handle SAR as a destination. */ + else + { + if (length == 8) + return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; + else if (length == 12) + return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; + else + { + operands[4] = GEN_INT (length); + output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", + operands); + return output_lbranch (operands[3], insn, 0); + } + } +} + +/* Copy any FP arguments in INSN into integer registers. */ +static void +copy_fp_args (rtx insn) +{ + rtx link; + rtx xoperands[2]; + + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + int arg_mode, regno; + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + + /* Is it a floating point register? */ + if (regno >= 32 && regno <= 39) + { + /* Copy the FP register into an integer register via memory. */ + if (arg_mode == SFmode) + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); + output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + else + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); + output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + } + } +} + +/* Compute length of the FP argument copy sequence for INSN. */ +static int +length_fp_args (rtx insn) +{ + int length = 0; + rtx link; + + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + int arg_mode, regno; + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + + /* Is it a floating point register? */ + if (regno >= 32 && regno <= 39) + { + if (arg_mode == SFmode) + length += 8; + else + length += 12; + } + } + + return length; +} + +/* Return the attribute length for the millicode call instruction INSN. + The length must match the code generated by output_millicode_call. + We include the delay slot in the returned length as it is better to + over estimate the length than to under estimate it. */ + +int +attr_length_millicode_call (rtx insn) +{ + unsigned long distance = -1; + unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; + + if (INSN_ADDRESSES_SET_P ()) + { + distance = (total + insn_current_reference_address (insn)); + if (distance < total) + distance = -1; + } + + if (TARGET_64BIT) + { + if (!TARGET_LONG_CALLS && distance < 7600000) + return 8; + + return 20; + } + else if (TARGET_PORTABLE_RUNTIME) + return 24; + else + { + if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET) + return 8; + + if (TARGET_LONG_ABS_CALL && !flag_pic) + return 12; + + return 24; + } +} + +/* INSN is a function call. It may have an unconditional jump + in its delay slot. + + CALL_DEST is the routine we are calling. */ + +const char * +output_millicode_call (rtx insn, rtx call_dest) +{ + int attr_length = get_attr_length (insn); + int seq_length = dbr_sequence_length (); + int distance; + rtx seq_insn; + rtx xoperands[3]; + + xoperands[0] = call_dest; + xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); + + /* Handle the common case where we are sure that the branch will + reach the beginning of the $CODE$ subspace. The within reach + form of the $$sh_func_adrs call has a length of 28. Because it + has an attribute type of sh_func_adrs, it never has a nonzero + sequence length (i.e., the delay slot is never filled). */ + if (!TARGET_LONG_CALLS + && (attr_length == 8 + || (attr_length == 28 + && get_attr_type (insn) == TYPE_SH_FUNC_ADRS))) + { + output_asm_insn ("{bl|b,l} %0,%2", xoperands); + } + else + { + if (TARGET_64BIT) + { + /* It might seem that one insn could be saved by accessing + the millicode function using the linkage table. However, + this doesn't work in shared libraries and other dynamically + loaded objects. Using a pc-relative sequence also avoids + problems related to the implicit use of the gp register. */ + output_asm_insn ("b,l .+8,%%r1", xoperands); + + if (TARGET_GAS) + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); + } + else + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); + } + + output_asm_insn ("bve,l (%%r1),%%r2", xoperands); + } + else if (TARGET_PORTABLE_RUNTIME) + { + /* Pure portable runtime doesn't allow be/ble; we also don't + have PIC support in the assembler/linker, so this sequence + is needed. */ + + /* Get the address of our target into %r1. */ + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); + + /* Get our return address into %r31. */ + output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); + output_asm_insn ("addi 8,%%r31,%%r31", xoperands); + + /* Jump to our target address in %r1. */ + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + else if (!flag_pic) + { + output_asm_insn ("ldil L'%0,%%r1", xoperands); + if (TARGET_PA_20) + output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); + else + output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); + } + else + { + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + output_asm_insn ("addi 16,%%r1,%%r31", xoperands); + + if (TARGET_SOM || !TARGET_GAS) + { + /* The HP assembler can generate relocations for the + difference of two symbols. GAS can do this for a + millicode symbol but not an arbitrary external + symbol when generating SOM output. */ + xoperands[1] = gen_label_rtx (); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); + output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1", + xoperands); + } + + /* Jump to our target address in %r1. */ + output_asm_insn ("bv %%r0(%%r1)", xoperands); + } + } + + if (seq_length == 0) + output_asm_insn ("nop", xoperands); + + /* We are done if there isn't a jump in the delay slot. */ + if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) + return ""; + + /* This call has an unconditional jump in its delay slot. */ + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + + /* See if the return address can be adjusted. Use the containing + sequence insn's address. */ + if (INSN_ADDRESSES_SET_P ()) + { + seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); + distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) + - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); + + if (VAL_14_BITS_P (distance)) + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("ldo %0-%1(%2),%2", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + } + else + /* ??? This branch may not reach its target. */ + output_asm_insn ("nop\n\tb,n %0", xoperands); + } + else + /* ??? This branch may not reach its target. */ + output_asm_insn ("nop\n\tb,n %0", xoperands); + + /* Delete the jump. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + + return ""; +} + +/* Return the attribute length of the call instruction INSN. The SIBCALL + flag indicates whether INSN is a regular call or a sibling call. The + length returned must be longer than the code actually generated by + output_call. Since branch shortening is done before delay branch + sequencing, there is no way to determine whether or not the delay + slot will be filled during branch shortening. Even when the delay + slot is filled, we may have to add a nop if the delay slot contains + a branch that can't reach its target. Thus, we always have to include + the delay slot in the length estimate. This used to be done in + pa_adjust_insn_length but we do it here now as some sequences always + fill the delay slot and we can save four bytes in the estimate for + these sequences. */ + +int +attr_length_call (rtx insn, int sibcall) +{ + int local_call; + rtx call, call_dest; + tree call_decl; + int length = 0; + rtx pat = PATTERN (insn); + unsigned long distance = -1; + + gcc_assert (GET_CODE (insn) == CALL_INSN); + + if (INSN_ADDRESSES_SET_P ()) + { + unsigned long total; + + total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; + distance = (total + insn_current_reference_address (insn)); + if (distance < total) + distance = -1; + } + + gcc_assert (GET_CODE (pat) == PARALLEL); + + /* Get the call rtx. */ + call = XVECEXP (pat, 0, 0); + if (GET_CODE (call) == SET) + call = SET_SRC (call); + + gcc_assert (GET_CODE (call) == CALL); + + /* Determine if this is a local call. */ + call_dest = XEXP (XEXP (call, 0), 0); + call_decl = SYMBOL_REF_DECL (call_dest); + local_call = call_decl && targetm.binds_local_p (call_decl); + + /* pc-relative branch. */ + if (!TARGET_LONG_CALLS + && ((TARGET_PA_20 && !sibcall && distance < 7600000) + || distance < MAX_PCREL17F_OFFSET)) + length += 8; + + /* 64-bit plabel sequence. */ + else if (TARGET_64BIT && !local_call) + length += sibcall ? 28 : 24; + + /* non-pic long absolute branch sequence. */ + else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) + length += 12; + + /* long pc-relative branch sequence. */ + else if (TARGET_LONG_PIC_SDIFF_CALL + || (TARGET_GAS && !TARGET_SOM + && (TARGET_LONG_PIC_PCREL_CALL || local_call))) + { + length += 20; + + if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) + length += 8; + } + + /* 32-bit plabel sequence. */ + else + { + length += 32; + + if (TARGET_SOM) + length += length_fp_args (insn); + + if (flag_pic) + length += 4; + + if (!TARGET_PA_20) + { + if (!sibcall) + length += 8; + + if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) + length += 8; + } + } + + return length; +} + +/* INSN is a function call. It may have an unconditional jump + in its delay slot. + + CALL_DEST is the routine we are calling. */ + +const char * +output_call (rtx insn, rtx call_dest, int sibcall) +{ + int delay_insn_deleted = 0; + int delay_slot_filled = 0; + int seq_length = dbr_sequence_length (); + tree call_decl = SYMBOL_REF_DECL (call_dest); + int local_call = call_decl && targetm.binds_local_p (call_decl); + rtx xoperands[2]; + + xoperands[0] = call_dest; + + /* Handle the common case where we're sure that the branch will reach + the beginning of the "$CODE$" subspace. This is the beginning of + the current function if we are in a named section. */ + if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8) + { + xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); + output_asm_insn ("{bl|b,l} %0,%1", xoperands); + } + else + { + if (TARGET_64BIT && !local_call) + { + /* ??? As far as I can tell, the HP linker doesn't support the + long pc-relative sequence described in the 64-bit runtime + architecture. So, we use a slightly longer indirect call. */ + xoperands[0] = get_deferred_plabel (call_dest); + xoperands[1] = gen_label_rtx (); + + /* If this isn't a sibcall, we put the load of %r27 into the + delay slot. We can't do this in a sibcall as we don't + have a second call-clobbered scratch register available. */ + if (seq_length != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && !sibcall) + { + final_scan_insn (NEXT_INSN (insn), asm_out_file, + optimize, 0, NULL); + + /* Now delete the delay insn. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + delay_insn_deleted = 1; + } + + output_asm_insn ("addil LT'%0,%%r27", xoperands); + output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); + output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); + + if (sibcall) + { + output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); + output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); + output_asm_insn ("bve (%%r1)", xoperands); + } + else + { + output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); + output_asm_insn ("bve,l (%%r2),%%r2", xoperands); + output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); + delay_slot_filled = 1; + } + } + else + { + int indirect_call = 0; + + /* Emit a long call. There are several different sequences + of increasing length and complexity. In most cases, + they don't allow an instruction in the delay slot. */ + if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) + && !TARGET_LONG_PIC_SDIFF_CALL + && !(TARGET_GAS && !TARGET_SOM + && (TARGET_LONG_PIC_PCREL_CALL || local_call)) + && !TARGET_64BIT) + indirect_call = 1; + + if (seq_length != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && !sibcall + && (!TARGET_PA_20 + || indirect_call + || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) + { + /* A non-jump insn in the delay slot. By definition we can + emit this insn before the call (and in fact before argument + relocating. */ + final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, + NULL); + + /* Now delete the delay insn. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + delay_insn_deleted = 1; + } + + if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) + { + /* This is the best sequence for making long calls in + non-pic code. Unfortunately, GNU ld doesn't provide + the stub needed for external calls, and GAS's support + for this with the SOM linker is buggy. It is safe + to use this for local calls. */ + output_asm_insn ("ldil L'%0,%%r1", xoperands); + if (sibcall) + output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); + else + { + if (TARGET_PA_20) + output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", + xoperands); + else + output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); + + output_asm_insn ("copy %%r31,%%r2", xoperands); + delay_slot_filled = 1; + } + } + else + { + if (TARGET_LONG_PIC_SDIFF_CALL) + { + /* The HP assembler and linker can handle relocations + for the difference of two symbols. The HP assembler + recognizes the sequence as a pc-relative call and + the linker provides stubs when needed. */ + xoperands[1] = gen_label_rtx (); + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); + } + else if (TARGET_GAS && !TARGET_SOM + && (TARGET_LONG_PIC_PCREL_CALL || local_call)) + { + /* GAS currently can't generate the relocations that + are needed for the SOM linker under HP-UX using this + sequence. The GNU linker doesn't generate the stubs + that are needed for external calls on TARGET_ELF32 + with this sequence. For now, we have to use a + longer plabel sequence when using GAS. */ + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", + xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", + xoperands); + } + else + { + /* Emit a long plabel-based call sequence. This is + essentially an inline implementation of $$dyncall. + We don't actually try to call $$dyncall as this is + as difficult as calling the function itself. */ + xoperands[0] = get_deferred_plabel (call_dest); + xoperands[1] = gen_label_rtx (); + + /* Since the call is indirect, FP arguments in registers + need to be copied to the general registers. Then, the + argument relocation stub will copy them back. */ + if (TARGET_SOM) + copy_fp_args (insn); + + if (flag_pic) + { + output_asm_insn ("addil LT'%0,%%r19", xoperands); + output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); + output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil LR'%0-$global$,%%r27", + xoperands); + output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", + xoperands); + } + + output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r1", xoperands); + output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); + + if (!sibcall && !TARGET_PA_20) + { + output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); + if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) + output_asm_insn ("addi 8,%%r2,%%r2", xoperands); + else + output_asm_insn ("addi 16,%%r2,%%r2", xoperands); + } + } + + if (TARGET_PA_20) + { + if (sibcall) + output_asm_insn ("bve (%%r1)", xoperands); + else + { + if (indirect_call) + { + output_asm_insn ("bve,l (%%r1),%%r2", xoperands); + output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); + delay_slot_filled = 1; + } + else + output_asm_insn ("bve,l (%%r1),%%r2", xoperands); + } + } + else + { + if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) + output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", + xoperands); + + if (sibcall) + { + if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) + output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); + else + output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); + } + else + { + if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) + output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); + else + output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); + + if (indirect_call) + output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); + else + output_asm_insn ("copy %%r31,%%r2", xoperands); + delay_slot_filled = 1; + } + } + } + } + } + + if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted)) + output_asm_insn ("nop", xoperands); + + /* We are done if there isn't a jump in the delay slot. */ + if (seq_length == 0 + || delay_insn_deleted + || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) + return ""; + + /* A sibcall should never have a branch in the delay slot. */ + gcc_assert (!sibcall); + + /* This call has an unconditional jump in its delay slot. */ + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + + if (!delay_slot_filled && INSN_ADDRESSES_SET_P ()) + { + /* See if the return address can be adjusted. Use the containing + sequence insn's address. This would break the regular call/return@ + relationship assumed by the table based eh unwinder, so only do that + if the call is not possibly throwing. */ + rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); + int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) + - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); + + if (VAL_14_BITS_P (distance) + && !(can_throw_internal (insn) || can_throw_external (insn))) + { + xoperands[1] = gen_label_rtx (); + output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + } + else + output_asm_insn ("nop\n\tb,n %0", xoperands); + } + else + output_asm_insn ("b,n %0", xoperands); + + /* Delete the jump. */ + SET_INSN_DELETED (NEXT_INSN (insn)); + + return ""; +} + +/* Return the attribute length of the indirect call instruction INSN. + The length must match the code generated by output_indirect call. + The returned length includes the delay slot. Currently, the delay + slot of an indirect call sequence is not exposed and it is used by + the sequence itself. */ + +int +attr_length_indirect_call (rtx insn) +{ + unsigned long distance = -1; + unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; + + if (INSN_ADDRESSES_SET_P ()) + { + distance = (total + insn_current_reference_address (insn)); + if (distance < total) + distance = -1; + } + + if (TARGET_64BIT) + return 12; + + if (TARGET_FAST_INDIRECT_CALLS + || (!TARGET_PORTABLE_RUNTIME + && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) + || distance < MAX_PCREL17F_OFFSET))) + return 8; + + if (flag_pic) + return 24; + + if (TARGET_PORTABLE_RUNTIME) + return 20; + + /* Out of reach, can use ble. */ + return 12; +} + +const char * +output_indirect_call (rtx insn, rtx call_dest) +{ + rtx xoperands[1]; + + if (TARGET_64BIT) + { + xoperands[0] = call_dest; + output_asm_insn ("ldd 16(%0),%%r2", xoperands); + output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands); + return ""; + } + + /* First the special case for kernels, level 0 systems, etc. */ + if (TARGET_FAST_INDIRECT_CALLS) + return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; + + /* Now the normal case -- we can reach $$dyncall directly or + we're sure that we can get there via a long-branch stub. + + No need to check target flags as the length uniquely identifies + the remaining cases. */ + if (attr_length_indirect_call (insn) == 8) + { + /* The HP linker sometimes substitutes a BLE for BL/B,L calls to + $$dyncall. Since BLE uses %r31 as the link register, the 22-bit + variant of the B,L instruction can't be used on the SOM target. */ + if (TARGET_PA_20 && !TARGET_SOM) + return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31"; + else + return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; + } + + /* Long millicode call, but we are not generating PIC or portable runtime + code. */ + if (attr_length_indirect_call (insn) == 12) + return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; + + /* Long millicode call for portable runtime. */ + if (attr_length_indirect_call (insn) == 20) + return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop"; + + /* We need a long PIC call to $$dyncall. */ + xoperands[0] = NULL_RTX; + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + if (TARGET_SOM || !TARGET_GAS) + { + xoperands[0] = gen_label_rtx (); + output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[0])); + output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands); + } + else + { + output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1", + xoperands); + } + output_asm_insn ("blr %%r0,%%r2", xoperands); + output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands); + return ""; +} + +/* Return the total length of the save and restore instructions needed for + the data linkage table pointer (i.e., the PIC register) across the call + instruction INSN. No-return calls do not require a save and restore. + In addition, we may be able to avoid the save and restore for calls + within the same translation unit. */ + +int +attr_length_save_restore_dltp (rtx insn) +{ + if (find_reg_note (insn, REG_NORETURN, NULL_RTX)) + return 0; + + return 8; +} + +/* In HPUX 8.0's shared library scheme, special relocations are needed + for function labels if they might be passed to a function + in a shared library (because shared libraries don't live in code + space), and special magic is needed to construct their address. */ + +void +hppa_encode_label (rtx sym) +{ + const char *str = XSTR (sym, 0); + int len = strlen (str) + 1; + char *newstr, *p; + + p = newstr = XALLOCAVEC (char, len + 1); + *p++ = '@'; + strcpy (p, str); + + XSTR (sym, 0) = ggc_alloc_string (newstr, len); +} + +static void +pa_encode_section_info (tree decl, rtx rtl, int first) +{ + int old_referenced = 0; + + if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) + old_referenced + = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; + + default_encode_section_info (decl, rtl, first); + + if (first && TEXT_SPACE_P (decl)) + { + SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; + if (TREE_CODE (decl) == FUNCTION_DECL) + hppa_encode_label (XEXP (rtl, 0)); + } + else if (old_referenced) + SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; +} + +/* This is sort of inverse to pa_encode_section_info. */ + +static const char * +pa_strip_name_encoding (const char *str) +{ + str += (*str == '@'); + str += (*str == '*'); + return str; +} + +int +function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0)); +} + +/* Returns 1 if OP is a function label involved in a simple addition + with a constant. Used to keep certain patterns from matching + during instruction combination. */ +int +is_function_label_plus_const (rtx op) +{ + /* Strip off any CONST. */ + if (GET_CODE (op) == CONST) + op = XEXP (op, 0); + + return (GET_CODE (op) == PLUS + && function_label_operand (XEXP (op, 0), Pmode) + && GET_CODE (XEXP (op, 1)) == CONST_INT); +} + +/* Output assembly code for a thunk to FUNCTION. */ + +static void +pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree function) +{ + static unsigned int current_thunk_number; + int val_14 = VAL_14_BITS_P (delta); + unsigned int old_last_address = last_address, nbytes = 0; + char label[16]; + rtx xoperands[4]; + + xoperands[0] = XEXP (DECL_RTL (function), 0); + xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); + xoperands[2] = GEN_INT (delta); + + ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0)); + fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n"); + + /* Output the thunk. We know that the function is in the same + translation unit (i.e., the same space) as the thunk, and that + thunks are output after their method. Thus, we don't need an + external branch to reach the function. With SOM and GAS, + functions and thunks are effectively in different sections. + Thus, we can always use a IA-relative branch and the linker + will add a long branch stub if necessary. + + However, we have to be careful when generating PIC code on the + SOM port to ensure that the sequence does not transfer to an + import stub for the target function as this could clobber the + return value saved at SP-24. This would also apply to the + 32-bit linux port if the multi-space model is implemented. */ + if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && !(flag_pic && TREE_PUBLIC (function)) + && (TARGET_GAS || last_address < 262132)) + || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME + && ((targetm.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) != NULL + /* The GNU 64-bit linker has rather poor stub management. + So, we use a long branch from thunks that aren't in + the same section as the target function. */ + && ((!TARGET_64BIT + && (DECL_SECTION_NAME (thunk_fndecl) + != DECL_SECTION_NAME (function))) + || ((DECL_SECTION_NAME (thunk_fndecl) + == DECL_SECTION_NAME (function)) + && last_address < 262132))) + || (targetm.have_named_sections + && DECL_SECTION_NAME (thunk_fndecl) == NULL + && DECL_SECTION_NAME (function) == NULL + && last_address < 262132) + || (!targetm.have_named_sections && last_address < 262132)))) + { + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("b %0", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 8; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 12; + } + } + else if (TARGET_64BIT) + { + /* We only have one call-clobbered scratch register, so we can't + make use of the delay slot if delta doesn't fit in 14 bits. */ + if (!val_14) + { + output_asm_insn ("addil L'%2,%%r26", xoperands); + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + } + + output_asm_insn ("b,l .+8,%%r1", xoperands); + + if (TARGET_GAS) + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); + } + else + { + xoperands[3] = GEN_INT (val_14 ? 8 : 16); + output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands); + } + + if (val_14) + { + output_asm_insn ("bv %%r0(%%r1)", xoperands); + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 20; + } + else + { + output_asm_insn ("bv,n %%r0(%%r1)", xoperands); + nbytes += 24; + } + } + else if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn ("ldil L'%0,%%r1", xoperands); + output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); + + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 16; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 20; + } + } + else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + { + /* The function is accessible from outside this module. The only + way to avoid an import stub between the thunk and function is to + call the function directly with an indirect sequence similar to + that used by $$dyncall. This is possible because $$dyncall acts + as the import stub in an indirect call. */ + ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); + xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); + output_asm_insn ("addil LT'%3,%%r19", xoperands); + output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); + output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); + + if (!val_14) + { + output_asm_insn ("addil L'%2,%%r26", xoperands); + nbytes += 4; + } + + if (TARGET_PA_20) + { + output_asm_insn ("bve (%%r22)", xoperands); + nbytes += 36; + } + else if (TARGET_NO_SPACE_REGS) + { + output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); + nbytes += 36; + } + else + { + output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); + output_asm_insn ("mtsp %%r21,%%sr0", xoperands); + output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); + nbytes += 44; + } + + if (val_14) + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + else + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + } + else if (flag_pic) + { + output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); + + if (TARGET_SOM || !TARGET_GAS) + { + output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands); + output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands); + } + else + { + output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); + output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands); + } + + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("bv %%r0(%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 20; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 24; + } + } + else + { + if (!val_14) + output_asm_insn ("addil L'%2,%%r26", xoperands); + + output_asm_insn ("ldil L'%0,%%r22", xoperands); + output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); + + if (val_14) + { + output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); + nbytes += 12; + } + else + { + output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); + nbytes += 16; + } + } + + fprintf (file, "\t.EXIT\n\t.PROCEND\n"); + + if (TARGET_SOM && TARGET_GAS) + { + /* We done with this subspace except possibly for some additional + debug information. Forget that we are in this subspace to ensure + that the next function is output in its own subspace. */ + in_section = NULL; + cfun->machine->in_nsubspa = 2; + } + + if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) + { + switch_to_section (data_section); + output_asm_insn (".align 4", xoperands); + ASM_OUTPUT_LABEL (file, label); + output_asm_insn (".word P'%0", xoperands); + } + + current_thunk_number++; + nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) + & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); + last_address += nbytes; + if (old_last_address > last_address) + last_address = UINT_MAX; + update_total_code_bytes (nbytes); +} + +/* Only direct calls to static functions are allowed to be sibling (tail) + call optimized. + + This restriction is necessary because some linker generated stubs will + store return pointers into rp' in some cases which might clobber a + live value already in rp'. + + In a sibcall the current function and the target function share stack + space. Thus if the path to the current function and the path to the + target function save a value in rp', they save the value into the + same stack slot, which has undesirable consequences. + + Because of the deferred binding nature of shared libraries any function + with external scope could be in a different load module and thus require + rp' to be saved when calling that function. So sibcall optimizations + can only be safe for static function. + + Note that GCC never needs return value relocations, so we don't have to + worry about static calls with return value relocations (which require + saving rp'). + + It is safe to perform a sibcall optimization when the target function + will never return. */ +static bool +pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + if (TARGET_PORTABLE_RUNTIME) + return false; + + /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in + single subspace mode and the call is not indirect. As far as I know, + there is no operating system support for the multiple subspace mode. + It might be possible to support indirect calls if we didn't use + $$dyncall (see the indirect sequence generated in output_call). */ + if (TARGET_ELF32) + return (decl != NULL_TREE); + + /* Sibcalls are not ok because the arg pointer register is not a fixed + register. This prevents the sibcall optimization from occurring. In + addition, there are problems with stub placement using GNU ld. This + is because a normal sibcall branch uses a 17-bit relocation while + a regular call branch uses a 22-bit relocation. As a result, more + care needs to be taken in the placement of long-branch stubs. */ + if (TARGET_64BIT) + return false; + + /* Sibcalls are only ok within a translation unit. */ + return (decl && !TREE_PUBLIC (decl)); +} + +/* ??? Addition is not commutative on the PA due to the weird implicit + space register selection rules for memory addresses. Therefore, we + don't consider a + b == b + a, as this might be inside a MEM. */ +static bool +pa_commutative_p (const_rtx x, int outer_code) +{ + return (COMMUTATIVE_P (x) + && (TARGET_NO_SPACE_REGS + || (outer_code != UNKNOWN && outer_code != MEM) + || GET_CODE (x) != PLUS)); +} + +/* Returns 1 if the 6 operands specified in OPERANDS are suitable for + use in fmpyadd instructions. */ +int +fmpyaddoperands (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + + /* Must be a floating point mode. */ + if (mode != SFmode && mode != DFmode) + return 0; + + /* All modes must be the same. */ + if (! (mode == GET_MODE (operands[1]) + && mode == GET_MODE (operands[2]) + && mode == GET_MODE (operands[3]) + && mode == GET_MODE (operands[4]) + && mode == GET_MODE (operands[5]))) + return 0; + + /* All operands must be registers. */ + if (! (GET_CODE (operands[1]) == REG + && GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[4]) == REG + && GET_CODE (operands[5]) == REG)) + return 0; + + /* Only 2 real operands to the addition. One of the input operands must + be the same as the output operand. */ + if (! rtx_equal_p (operands[3], operands[4]) + && ! rtx_equal_p (operands[3], operands[5])) + return 0; + + /* Inout operand of add cannot conflict with any operands from multiply. */ + if (rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2])) + return 0; + + /* multiply cannot feed into addition operands. */ + if (rtx_equal_p (operands[4], operands[0]) + || rtx_equal_p (operands[5], operands[0])) + return 0; + + /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ + if (mode == SFmode + && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) + return 0; + + /* Passed. Operands are suitable for fmpyadd. */ + return 1; +} + +#if !defined(USE_COLLECT2) +static void +pa_asm_out_constructor (rtx symbol, int priority) +{ + if (!function_label_operand (symbol, VOIDmode)) + hppa_encode_label (symbol); + +#ifdef CTORS_SECTION_ASM_OP + default_ctor_section_asm_out_constructor (symbol, priority); +#else +# ifdef TARGET_ASM_NAMED_SECTION + default_named_section_asm_out_constructor (symbol, priority); +# else + default_stabs_asm_out_constructor (symbol, priority); +# endif +#endif +} + +static void +pa_asm_out_destructor (rtx symbol, int priority) +{ + if (!function_label_operand (symbol, VOIDmode)) + hppa_encode_label (symbol); + +#ifdef DTORS_SECTION_ASM_OP + default_dtor_section_asm_out_destructor (symbol, priority); +#else +# ifdef TARGET_ASM_NAMED_SECTION + default_named_section_asm_out_destructor (symbol, priority); +# else + default_stabs_asm_out_destructor (symbol, priority); +# endif +#endif +} +#endif + +/* This function places uninitialized global data in the bss section. + The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this + function on the SOM port to prevent uninitialized global data from + being placed in the data section. */ + +void +pa_asm_output_aligned_bss (FILE *stream, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + switch_to_section (bss_section); + fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); + +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); +#endif + +#ifdef ASM_OUTPUT_SIZE_DIRECTIVE + ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); +#endif + + fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); + ASM_OUTPUT_LABEL (stream, name); + fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); +} + +/* Both the HP and GNU assemblers under HP-UX provide a .comm directive + that doesn't allow the alignment of global common storage to be directly + specified. The SOM linker aligns common storage based on the rounded + value of the NUM_BYTES parameter in the .comm directive. It's not + possible to use the .align directive as it doesn't affect the alignment + of the label associated with a .comm directive. */ + +void +pa_asm_output_aligned_common (FILE *stream, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + unsigned int max_common_align; + + max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); + if (align > max_common_align) + { + warning (0, "alignment (%u) for %s exceeds maximum alignment " + "for global common data. Using %u", + align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); + align = max_common_align; + } + + switch_to_section (bss_section); + + assemble_name (stream, name); + fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n", + MAX (size, align / BITS_PER_UNIT)); +} + +/* We can't use .comm for local common storage as the SOM linker effectively + treats the symbol as universal and uses the same storage for local symbols + with the same name in different object files. The .block directive + reserves an uninitialized block of storage. However, it's not common + storage. Fortunately, GCC never requests common storage with the same + name in any given translation unit. */ + +void +pa_asm_output_aligned_local (FILE *stream, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + switch_to_section (bss_section); + fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); + +#ifdef LOCAL_ASM_OP + fprintf (stream, "%s", LOCAL_ASM_OP); + assemble_name (stream, name); + fprintf (stream, "\n"); +#endif + + ASM_OUTPUT_LABEL (stream, name); + fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); +} + +/* Returns 1 if the 6 operands specified in OPERANDS are suitable for + use in fmpysub instructions. */ +int +fmpysuboperands (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + + /* Must be a floating point mode. */ + if (mode != SFmode && mode != DFmode) + return 0; + + /* All modes must be the same. */ + if (! (mode == GET_MODE (operands[1]) + && mode == GET_MODE (operands[2]) + && mode == GET_MODE (operands[3]) + && mode == GET_MODE (operands[4]) + && mode == GET_MODE (operands[5]))) + return 0; + + /* All operands must be registers. */ + if (! (GET_CODE (operands[1]) == REG + && GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[4]) == REG + && GET_CODE (operands[5]) == REG)) + return 0; + + /* Only 2 real operands to the subtraction. Subtraction is not a commutative + operation, so operands[4] must be the same as operand[3]. */ + if (! rtx_equal_p (operands[3], operands[4])) + return 0; + + /* multiply cannot feed into subtraction. */ + if (rtx_equal_p (operands[5], operands[0])) + return 0; + + /* Inout operand of sub cannot conflict with any operands from multiply. */ + if (rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2])) + return 0; + + /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ + if (mode == SFmode + && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS + || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) + return 0; + + /* Passed. Operands are suitable for fmpysub. */ + return 1; +} + +/* Return 1 if the given constant is 2, 4, or 8. These are the valid + constants for shadd instructions. */ +int +shadd_constant_p (int val) +{ + if (val == 2 || val == 4 || val == 8) + return 1; + else + return 0; +} + +/* Return 1 if OP is valid as a base or index register in a + REG+REG address. */ + +int +borx_reg_operand (rtx op, enum machine_mode mode) +{ + if (GET_CODE (op) != REG) + return 0; + + /* We must reject virtual registers as the only expressions that + can be instantiated are REG and REG+CONST. */ + if (op == virtual_incoming_args_rtx + || op == virtual_stack_vars_rtx + || op == virtual_stack_dynamic_rtx + || op == virtual_outgoing_args_rtx + || op == virtual_cfa_rtx) + return 0; + + /* While it's always safe to index off the frame pointer, it's not + profitable to do so when the frame pointer is being eliminated. */ + if (!reload_completed + && flag_omit_frame_pointer + && !cfun->calls_alloca + && op == frame_pointer_rtx) + return 0; + + return register_operand (op, mode); +} + +/* Return 1 if this operand is anything other than a hard register. */ + +int +non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER); +} + +/* Return TRUE if INSN branches forward. */ + +static bool +forward_branch_p (rtx insn) +{ + rtx lab = JUMP_LABEL (insn); + + /* The INSN must have a jump label. */ + gcc_assert (lab != NULL_RTX); + + if (INSN_ADDRESSES_SET_P ()) + return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); + + while (insn) + { + if (insn == lab) + return true; + else + insn = NEXT_INSN (insn); + } + + return false; +} + +/* Return 1 if OP is an equality comparison, else return 0. */ +int +eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return (GET_CODE (op) == EQ || GET_CODE (op) == NE); +} + +/* Return 1 if INSN is in the delay slot of a call instruction. */ +int +jump_in_call_delay (rtx insn) +{ + + if (GET_CODE (insn) != JUMP_INSN) + return 0; + + if (PREV_INSN (insn) + && PREV_INSN (PREV_INSN (insn)) + && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) + { + rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn))); + + return (GET_CODE (PATTERN (test_insn)) == SEQUENCE + && XVECEXP (PATTERN (test_insn), 0, 1) == insn); + + } + else + return 0; +} + +/* Output an unconditional move and branch insn. */ + +const char * +output_parallel_movb (rtx *operands, rtx insn) +{ + int length = get_attr_length (insn); + + /* These are the cases in which we win. */ + if (length == 4) + return "mov%I1b,tr %1,%0,%2"; + + /* None of the following cases win, but they don't lose either. */ + if (length == 8) + { + if (dbr_sequence_length () == 0) + { + /* Nothing in the delay slot, fake it by putting the combined + insn (the copy or add) in the delay slot of a bl. */ + if (GET_CODE (operands[1]) == CONST_INT) + return "b %2\n\tldi %1,%0"; + else + return "b %2\n\tcopy %1,%0"; + } + else + { + /* Something in the delay slot, but we've got a long branch. */ + if (GET_CODE (operands[1]) == CONST_INT) + return "ldi %1,%0\n\tb %2"; + else + return "copy %1,%0\n\tb %2"; + } + } + + if (GET_CODE (operands[1]) == CONST_INT) + output_asm_insn ("ldi %1,%0", operands); + else + output_asm_insn ("copy %1,%0", operands); + return output_lbranch (operands[2], insn, 1); +} + +/* Output an unconditional add and branch insn. */ + +const char * +output_parallel_addb (rtx *operands, rtx insn) +{ + int length = get_attr_length (insn); + + /* To make life easy we want operand0 to be the shared input/output + operand and operand1 to be the readonly operand. */ + if (operands[0] == operands[1]) + operands[1] = operands[2]; + + /* These are the cases in which we win. */ + if (length == 4) + return "add%I1b,tr %1,%0,%3"; + + /* None of the following cases win, but they don't lose either. */ + if (length == 8) + { + if (dbr_sequence_length () == 0) + /* Nothing in the delay slot, fake it by putting the combined + insn (the copy or add) in the delay slot of a bl. */ + return "b %3\n\tadd%I1 %1,%0,%0"; + else + /* Something in the delay slot, but we've got a long branch. */ + return "add%I1 %1,%0,%0\n\tb %3"; + } + + output_asm_insn ("add%I1 %1,%0,%0", operands); + return output_lbranch (operands[3], insn, 1); +} + +/* Return nonzero if INSN (a jump insn) immediately follows a call + to a named function. This is used to avoid filling the delay slot + of the jump since it can usually be eliminated by modifying RP in + the delay slot of the call. */ + +int +following_call (rtx insn) +{ + if (! TARGET_JUMP_IN_DELAY) + return 0; + + /* Find the previous real insn, skipping NOTEs. */ + insn = PREV_INSN (insn); + while (insn && GET_CODE (insn) == NOTE) + insn = PREV_INSN (insn); + + /* Check for CALL_INSNs and millicode calls. */ + if (insn + && ((GET_CODE (insn) == CALL_INSN + && get_attr_type (insn) != TYPE_DYNCALL) + || (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) != SEQUENCE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI))) + return 1; + + return 0; +} + +/* We use this hook to perform a PA specific optimization which is difficult + to do in earlier passes. + + We want the delay slots of branches within jump tables to be filled. + None of the compiler passes at the moment even has the notion that a + PA jump table doesn't contain addresses, but instead contains actual + instructions! + + Because we actually jump into the table, the addresses of each entry + must stay constant in relation to the beginning of the table (which + itself must stay constant relative to the instruction to jump into + it). I don't believe we can guarantee earlier passes of the compiler + will adhere to those rules. + + So, late in the compilation process we find all the jump tables, and + expand them into real code -- e.g. each entry in the jump table vector + will get an appropriate label followed by a jump to the final target. + + Reorg and the final jump pass can then optimize these branches and + fill their delay slots. We end up with smaller, more efficient code. + + The jump instructions within the table are special; we must be able + to identify them during assembly output (if the jumps don't get filled + we need to emit a nop rather than nullifying the delay slot)). We + identify jumps in switch tables by using insns with the attribute + type TYPE_BTABLE_BRANCH. + + We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB + insns. This serves two purposes, first it prevents jump.c from + noticing that the last N entries in the table jump to the instruction + immediately after the table and deleting the jumps. Second, those + insns mark where we should emit .begin_brtab and .end_brtab directives + when using GAS (allows for better link time optimizations). */ + +static void +pa_reorg (void) +{ + rtx insn; + + remove_useless_addtr_insns (1); + + if (pa_cpu < PROCESSOR_8000) + pa_combine_instructions (); + + + /* This is fairly cheap, so always run it if optimizing. */ + if (optimize > 0 && !TARGET_BIG_SWITCH) + { + /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pattern, tmp, location, label; + unsigned int length, i; + + /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ + if (GET_CODE (insn) != JUMP_INSN + || (GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) + continue; + + /* Emit marker for the beginning of the branch table. */ + emit_insn_before (gen_begin_brtab (), insn); + + pattern = PATTERN (insn); + location = PREV_INSN (insn); + length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); + + for (i = 0; i < length; i++) + { + /* Emit a label before each jump to keep jump.c from + removing this code. */ + tmp = gen_label_rtx (); + LABEL_NUSES (tmp) = 1; + emit_label_after (tmp, location); + location = NEXT_INSN (location); + + if (GET_CODE (pattern) == ADDR_VEC) + label = XEXP (XVECEXP (pattern, 0, i), 0); + else + label = XEXP (XVECEXP (pattern, 1, i), 0); + + tmp = gen_short_jump (label); + + /* Emit the jump itself. */ + tmp = emit_jump_insn_after (tmp, location); + JUMP_LABEL (tmp) = label; + LABEL_NUSES (label)++; + location = NEXT_INSN (location); + + /* Emit a BARRIER after the jump. */ + emit_barrier_after (location); + location = NEXT_INSN (location); + } + + /* Emit marker for the end of the branch table. */ + emit_insn_before (gen_end_brtab (), location); + location = NEXT_INSN (location); + emit_barrier_after (location); + + /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ + delete_insn (insn); + } + } + else + { + /* Still need brtab marker insns. FIXME: the presence of these + markers disables output of the branch table to readonly memory, + and any alignment directives that might be needed. Possibly, + the begin_brtab insn should be output before the label for the + table. This doesn't matter at the moment since the tables are + always output in the text section. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + /* Find an ADDR_VEC insn. */ + if (GET_CODE (insn) != JUMP_INSN + || (GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) + continue; + + /* Now generate markers for the beginning and end of the + branch table. */ + emit_insn_before (gen_begin_brtab (), insn); + emit_insn_after (gen_end_brtab (), insn); + } + } +} + +/* The PA has a number of odd instructions which can perform multiple + tasks at once. On first generation PA machines (PA1.0 and PA1.1) + it may be profitable to combine two instructions into one instruction + with two outputs. It's not profitable PA2.0 machines because the + two outputs would take two slots in the reorder buffers. + + This routine finds instructions which can be combined and combines + them. We only support some of the potential combinations, and we + only try common ways to find suitable instructions. + + * addb can add two registers or a register and a small integer + and jump to a nearby (+-8k) location. Normally the jump to the + nearby location is conditional on the result of the add, but by + using the "true" condition we can make the jump unconditional. + Thus addb can perform two independent operations in one insn. + + * movb is similar to addb in that it can perform a reg->reg + or small immediate->reg copy and jump to a nearby (+-8k location). + + * fmpyadd and fmpysub can perform a FP multiply and either an + FP add or FP sub if the operands of the multiply and add/sub are + independent (there are other minor restrictions). Note both + the fmpy and fadd/fsub can in theory move to better spots according + to data dependencies, but for now we require the fmpy stay at a + fixed location. + + * Many of the memory operations can perform pre & post updates + of index registers. GCC's pre/post increment/decrement addressing + is far too simple to take advantage of all the possibilities. This + pass may not be suitable since those insns may not be independent. + + * comclr can compare two ints or an int and a register, nullify + the following instruction and zero some other register. This + is more difficult to use as it's harder to find an insn which + will generate a comclr than finding something like an unconditional + branch. (conditional moves & long branches create comclr insns). + + * Most arithmetic operations can conditionally skip the next + instruction. They can be viewed as "perform this operation + and conditionally jump to this nearby location" (where nearby + is an insns away). These are difficult to use due to the + branch length restrictions. */ + +static void +pa_combine_instructions (void) +{ + rtx anchor, new_rtx; + + /* This can get expensive since the basic algorithm is on the + order of O(n^2) (or worse). Only do it for -O2 or higher + levels of optimization. */ + if (optimize < 2) + return; + + /* Walk down the list of insns looking for "anchor" insns which + may be combined with "floating" insns. As the name implies, + "anchor" instructions don't move, while "floating" insns may + move around. */ + new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); + new_rtx = make_insn_raw (new_rtx); + + for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) + { + enum attr_pa_combine_type anchor_attr; + enum attr_pa_combine_type floater_attr; + + /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. + Also ignore any special USE insns. */ + if ((GET_CODE (anchor) != INSN + && GET_CODE (anchor) != JUMP_INSN + && GET_CODE (anchor) != CALL_INSN) + || GET_CODE (PATTERN (anchor)) == USE + || GET_CODE (PATTERN (anchor)) == CLOBBER + || GET_CODE (PATTERN (anchor)) == ADDR_VEC + || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) + continue; + + anchor_attr = get_attr_pa_combine_type (anchor); + /* See if anchor is an insn suitable for combination. */ + if (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB + || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && ! forward_branch_p (anchor))) + { + rtx floater; + + for (floater = PREV_INSN (anchor); + floater; + floater = PREV_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new_rtx, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + + else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && floater_attr == PA_COMBINE_TYPE_ADDMOVE) + { + if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) + { + if (pa_can_combine_p (new_rtx, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + else + { + if (pa_can_combine_p (new_rtx, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + SET_SRC (PATTERN (floater)), + SET_SRC (PATTERN (floater)))) + break; + } + } + } + + /* If we didn't find anything on the backwards scan try forwards. */ + if (!floater + && (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) + { + for (floater = anchor; floater; floater = NEXT_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new_rtx, anchor, floater, 1, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), + 0), + XEXP (SET_SRC (PATTERN (floater)), + 1))) + break; + } + } + } + + /* FLOATER will be nonzero if we found a suitable floating + insn for combination with ANCHOR. */ + if (floater + && (anchor_attr == PA_COMBINE_TYPE_FADDSUB + || anchor_attr == PA_COMBINE_TYPE_FMPY)) + { + /* Emit the new instruction and delete the old anchor. */ + emit_insn_before (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + + SET_INSN_DELETED (anchor); + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); + delete_insn (floater); + + continue; + } + else if (floater + && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) + { + rtx temp; + /* Emit the new_jump instruction and delete the old anchor. */ + temp + = emit_jump_insn_before (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + + JUMP_LABEL (temp) = JUMP_LABEL (anchor); + SET_INSN_DELETED (anchor); + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); + delete_insn (floater); + continue; + } + } + } +} + +static int +pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest, + rtx src1, rtx src2) +{ + int insn_code_number; + rtx start, end; + + /* Create a PARALLEL with the patterns of ANCHOR and + FLOATER, try to recognize it, then test constraints + for the resulting pattern. + + If the pattern doesn't match or the constraints + aren't met keep searching for a suitable floater + insn. */ + XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); + XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); + INSN_CODE (new_rtx) = -1; + insn_code_number = recog_memoized (new_rtx); + if (insn_code_number < 0 + || (extract_insn (new_rtx), ! constrain_operands (1))) + return 0; + + if (reversed) + { + start = anchor; + end = floater; + } + else + { + start = floater; + end = anchor; + } + + /* There's up to three operands to consider. One + output and two inputs. + + The output must not be used between FLOATER & ANCHOR + exclusive. The inputs must not be set between + FLOATER and ANCHOR exclusive. */ + + if (reg_used_between_p (dest, start, end)) + return 0; + + if (reg_set_between_p (src1, start, end)) + return 0; + + if (reg_set_between_p (src2, start, end)) + return 0; + + /* If we get here, then everything is good. */ + return 1; +} + +/* Return nonzero if references for INSN are delayed. + + Millicode insns are actually function calls with some special + constraints on arguments and register usage. + + Millicode calls always expect their arguments in the integer argument + registers, and always return their result in %r29 (ret1). They + are expected to clobber their arguments, %r1, %r29, and the return + pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. + + This function tells reorg that the references to arguments and + millicode calls do not appear to happen until after the millicode call. + This allows reorg to put insns which set the argument registers into the + delay slot of the millicode call -- thus they act more like traditional + CALL_INSNs. + + Note we cannot consider side effects of the insn to be delayed because + the branch and link insn will clobber the return pointer. If we happened + to use the return pointer in the delay slot of the call, then we lose. + + get_attr_type will try to recognize the given insn, so make sure to + filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns + in particular. */ +int +insn_refs_are_delayed (rtx insn) +{ + return ((GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) != SEQUENCE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI)); +} + +/* Promote the return value, but not the arguments. */ + +static enum machine_mode +pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return) +{ + if (for_return == 0) + return mode; + return promote_mode (type, mode, punsignedp); +} + +/* On the HP-PA the value is found in register(s) 28(-29), unless + the mode is SF or DF. Then the value is returned in fr4 (32). + + This must perform the same promotions as PROMOTE_MODE, else promoting + return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. + + Small structures must be returned in a PARALLEL on PA64 in order + to match the HP Compiler ABI. */ + +static rtx +pa_function_value (const_tree valtype, + const_tree func ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode valmode; + + if (AGGREGATE_TYPE_P (valtype) + || TREE_CODE (valtype) == COMPLEX_TYPE + || TREE_CODE (valtype) == VECTOR_TYPE) + { + if (TARGET_64BIT) + { + /* Aggregates with a size less than or equal to 128 bits are + returned in GR 28(-29). They are left justified. The pad + bits are undefined. Larger aggregates are returned in + memory. */ + rtx loc[2]; + int i, offset = 0; + int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2; + + for (i = 0; i < ub; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, 28 + i), + GEN_INT (offset)); + offset += 8; + } + + return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); + } + else if (int_size_in_bytes (valtype) > UNITS_PER_WORD) + { + /* Aggregates 5 to 8 bytes in size are returned in general + registers r28-r29 in the same manner as other non + floating-point objects. The data is right-justified and + zero-extended to 64 bits. This is opposite to the normal + justification used on big endian targets and requires + special treatment. */ + rtx loc = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, 28), const0_rtx); + return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); + } + } + + if ((INTEGRAL_TYPE_P (valtype) + && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) + || POINTER_TYPE_P (valtype)) + valmode = word_mode; + else + valmode = TYPE_MODE (valtype); + + if (TREE_CODE (valtype) == REAL_TYPE + && !AGGREGATE_TYPE_P (valtype) + && TYPE_MODE (valtype) != TFmode + && !TARGET_SOFT_FLOAT) + return gen_rtx_REG (valmode, 32); + + return gen_rtx_REG (valmode, 28); +} + +/* Implement the TARGET_LIBCALL_VALUE hook. */ + +static rtx +pa_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + if (! TARGET_SOFT_FLOAT + && (mode == SFmode || mode == DFmode)) + return gen_rtx_REG (mode, 32); + else + return gen_rtx_REG (mode, 28); +} + +/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */ + +static bool +pa_function_value_regno_p (const unsigned int regno) +{ + if (regno == 28 + || (! TARGET_SOFT_FLOAT && regno == 32)) + return true; + + return false; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int arg_size = FUNCTION_ARG_SIZE (mode, type); + + cum->nargs_prototype--; + cum->words += (arg_size + + ((cum->words & 01) + && type != NULL_TREE + && arg_size > 1)); +} + +/* Return the location of a parameter that is passed in a register or NULL + if the parameter has any component that is passed in memory. + + This is new code and will be pushed to into the net sources after + further testing. + + ??? We might want to restructure this so that it looks more like other + ports. */ +static rtx +pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int max_arg_words = (TARGET_64BIT ? 8 : 4); + int alignment = 0; + int arg_size; + int fpr_reg_base; + int gpr_reg_base; + rtx retval; + + if (mode == VOIDmode) + return NULL_RTX; + + arg_size = FUNCTION_ARG_SIZE (mode, type); + + /* If this arg would be passed partially or totally on the stack, then + this routine should return zero. pa_arg_partial_bytes will + handle arguments which are split between regs and stack slots if + the ABI mandates split arguments. */ + if (!TARGET_64BIT) + { + /* The 32-bit ABI does not split arguments. */ + if (cum->words + arg_size > max_arg_words) + return NULL_RTX; + } + else + { + if (arg_size > 1) + alignment = cum->words & 1; + if (cum->words + alignment >= max_arg_words) + return NULL_RTX; + } + + /* The 32bit ABIs and the 64bit ABIs are rather different, + particularly in their handling of FP registers. We might + be able to cleverly share code between them, but I'm not + going to bother in the hope that splitting them up results + in code that is more easily understood. */ + + if (TARGET_64BIT) + { + /* Advance the base registers to their current locations. + + Remember, gprs grow towards smaller register numbers while + fprs grow to higher register numbers. Also remember that + although FP regs are 32-bit addressable, we pretend that + the registers are 64-bits wide. */ + gpr_reg_base = 26 - cum->words; + fpr_reg_base = 32 + cum->words; + + /* Arguments wider than one word and small aggregates need special + treatment. */ + if (arg_size > 1 + || mode == BLKmode + || (type && (AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE))) + { + /* Double-extended precision (80-bit), quad-precision (128-bit) + and aggregates including complex numbers are aligned on + 128-bit boundaries. The first eight 64-bit argument slots + are associated one-to-one, with general registers r26 + through r19, and also with floating-point registers fr4 + through fr11. Arguments larger than one word are always + passed in general registers. + + Using a PARALLEL with a word mode register results in left + justified data on a big-endian target. */ + + rtx loc[8]; + int i, offset = 0, ub = arg_size; + + /* Align the base register. */ + gpr_reg_base -= alignment; + + ub = MIN (ub, max_arg_words - cum->words - alignment); + for (i = 0; i < ub; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, gpr_reg_base), + GEN_INT (offset)); + gpr_reg_base -= 1; + offset += 8; + } + + return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); + } + } + else + { + /* If the argument is larger than a word, then we know precisely + which registers we must use. */ + if (arg_size > 1) + { + if (cum->words) + { + gpr_reg_base = 23; + fpr_reg_base = 38; + } + else + { + gpr_reg_base = 25; + fpr_reg_base = 34; + } + + /* Structures 5 to 8 bytes in size are passed in the general + registers in the same manner as other non floating-point + objects. The data is right-justified and zero-extended + to 64 bits. This is opposite to the normal justification + used on big endian targets and requires special treatment. + We now define BLOCK_REG_PADDING to pad these objects. + Aggregates, complex and vector types are passed in the same + manner as structures. */ + if (mode == BLKmode + || (type && (AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE))) + { + rtx loc = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, gpr_reg_base), + const0_rtx); + return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); + } + } + else + { + /* We have a single word (32 bits). A simple computation + will get us the register #s we need. */ + gpr_reg_base = 26 - cum->words; + fpr_reg_base = 32 + 2 * cum->words; + } + } + + /* Determine if the argument needs to be passed in both general and + floating point registers. */ + if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) + /* If we are doing soft-float with portable runtime, then there + is no need to worry about FP regs. */ + && !TARGET_SOFT_FLOAT + /* The parameter must be some kind of scalar float, else we just + pass it in integer registers. */ + && GET_MODE_CLASS (mode) == MODE_FLOAT + /* The target function must not have a prototype. */ + && cum->nargs_prototype <= 0 + /* libcalls do not need to pass items in both FP and general + registers. */ + && type != NULL_TREE + /* All this hair applies to "outgoing" args only. This includes + sibcall arguments setup with FUNCTION_INCOMING_ARG. */ + && !cum->incoming) + /* Also pass outgoing floating arguments in both registers in indirect + calls with the 32 bit ABI and the HP assembler since there is no + way to the specify argument locations in static functions. */ + || (!TARGET_64BIT + && !TARGET_GAS + && !cum->incoming + && cum->indirect + && GET_MODE_CLASS (mode) == MODE_FLOAT)) + { + retval + = gen_rtx_PARALLEL + (mode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, fpr_reg_base), + const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, gpr_reg_base), + const0_rtx))); + } + else + { + /* See if we should pass this parameter in a general register. */ + if (TARGET_SOFT_FLOAT + /* Indirect calls in the normal 32bit ABI require all arguments + to be passed in general registers. */ + || (!TARGET_PORTABLE_RUNTIME + && !TARGET_64BIT + && !TARGET_ELF32 + && cum->indirect) + /* If the parameter is not a scalar floating-point parameter, + then it belongs in GPRs. */ + || GET_MODE_CLASS (mode) != MODE_FLOAT + /* Structure with single SFmode field belongs in GPR. */ + || (type && AGGREGATE_TYPE_P (type))) + retval = gen_rtx_REG (mode, gpr_reg_base); + else + retval = gen_rtx_REG (mode, fpr_reg_base); + } + return retval; +} + +/* Arguments larger than one word are double word aligned. */ + +static unsigned int +pa_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + bool singleword = (type + ? (integer_zerop (TYPE_SIZE (type)) + || !TREE_CONSTANT (TYPE_SIZE (type)) + || int_size_in_bytes (type) <= UNITS_PER_WORD) + : GET_MODE_SIZE (mode) <= UNITS_PER_WORD); + + return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY; +} + +/* If this arg would be passed totally in registers or totally on the stack, + then this routine should return zero. */ + +static int +pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + unsigned int max_arg_words = 8; + unsigned int offset = 0; + + if (!TARGET_64BIT) + return 0; + + if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1)) + offset = 1; + + if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words) + /* Arg fits fully into registers. */ + return 0; + else if (cum->words + offset >= max_arg_words) + /* Arg fully on the stack. */ + return 0; + else + /* Arg is split. */ + return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; +} + + +/* A get_unnamed_section callback for switching to the text section. + + This function is only used with SOM. Because we don't support + named subspaces, we can only create a new subspace or switch back + to the default text subspace. */ + +static void +som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) +{ + gcc_assert (TARGET_SOM); + if (TARGET_GAS) + { + if (cfun && cfun->machine && !cfun->machine->in_nsubspa) + { + /* We only want to emit a .nsubspa directive once at the + start of the function. */ + cfun->machine->in_nsubspa = 1; + + /* Create a new subspace for the text. This provides + better stub placement and one-only functions. */ + if (cfun->decl + && DECL_ONE_ONLY (cfun->decl) + && !DECL_WEAK (cfun->decl)) + { + output_section_asm_op ("\t.SPACE $TEXT$\n" + "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," + "ACCESS=44,SORT=24,COMDAT"); + return; + } + } + else + { + /* There isn't a current function or the body of the current + function has been completed. So, we are changing to the + text section to output debugging information. Thus, we + need to forget that we are in the text section so that + varasm.c will call us when text_section is selected again. */ + gcc_assert (!cfun || !cfun->machine + || cfun->machine->in_nsubspa == 2); + in_section = NULL; + } + output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); + return; + } + output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); +} + +/* A get_unnamed_section callback for switching to comdat data + sections. This function is only used with SOM. */ + +static void +som_output_comdat_data_section_asm_op (const void *data) +{ + in_section = NULL; + output_section_asm_op (data); +} + +/* Implement TARGET_ASM_INITIALIZE_SECTIONS */ + +static void +pa_som_asm_init_sections (void) +{ + text_section + = get_unnamed_section (0, som_output_text_section_asm_op, NULL); + + /* SOM puts readonly data in the default $LIT$ subspace when PIC code + is not being generated. */ + som_readonly_data_section + = get_unnamed_section (0, output_section_asm_op, + "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); + + /* When secondary definitions are not supported, SOM makes readonly + data one-only by creating a new $LIT$ subspace in $TEXT$ with + the comdat flag. */ + som_one_only_readonly_data_section + = get_unnamed_section (0, som_output_comdat_data_section_asm_op, + "\t.SPACE $TEXT$\n" + "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," + "ACCESS=0x2c,SORT=16,COMDAT"); + + + /* When secondary definitions are not supported, SOM makes data one-only + by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ + som_one_only_data_section + = get_unnamed_section (SECTION_WRITE, + som_output_comdat_data_section_asm_op, + "\t.SPACE $PRIVATE$\n" + "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," + "ACCESS=31,SORT=24,COMDAT"); + + /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups + which reference data within the $TEXT$ space (for example constant + strings in the $LIT$ subspace). + + The assemblers (GAS and HP as) both have problems with handling + the difference of two symbols which is the other correct way to + reference constant data during PIC code generation. + + So, there's no way to reference constant data which is in the + $TEXT$ space during PIC generation. Instead place all constant + data into the $PRIVATE$ subspace (this reduces sharing, but it + works correctly). */ + readonly_data_section = flag_pic ? data_section : som_readonly_data_section; + + /* We must not have a reference to an external symbol defined in a + shared library in a readonly section, else the SOM linker will + complain. + + So, we force exception information into the data section. */ + exception_section = data_section; +} + +/* On hpux10, the linker will give an error if we have a reference + in the read-only data section to a symbol defined in a shared + library. Therefore, expressions that might require a reloc can + not be placed in the read-only data section. */ + +static section * +pa_select_section (tree exp, int reloc, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + if (TREE_CODE (exp) == VAR_DECL + && TREE_READONLY (exp) + && !TREE_THIS_VOLATILE (exp) + && DECL_INITIAL (exp) + && (DECL_INITIAL (exp) == error_mark_node + || TREE_CONSTANT (DECL_INITIAL (exp))) + && !reloc) + { + if (TARGET_SOM + && DECL_ONE_ONLY (exp) + && !DECL_WEAK (exp)) + return som_one_only_readonly_data_section; + else + return readonly_data_section; + } + else if (CONSTANT_CLASS_P (exp) && !reloc) + return readonly_data_section; + else if (TARGET_SOM + && TREE_CODE (exp) == VAR_DECL + && DECL_ONE_ONLY (exp) + && !DECL_WEAK (exp)) + return som_one_only_data_section; + else + return data_section; +} + +static void +pa_globalize_label (FILE *stream, const char *name) +{ + /* We only handle DATA objects here, functions are globalized in + ASM_DECLARE_FUNCTION_NAME. */ + if (! FUNCTION_NAME_P (name)) + { + fputs ("\t.EXPORT ", stream); + assemble_name (stream, name); + fputs (",DATA\n", stream); + } +} + +/* Worker function for TARGET_STRUCT_VALUE_RTX. */ + +static rtx +pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +bool +pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + /* SOM ABI says that objects larger than 64 bits are returned in memory. + PA64 ABI says that objects larger than 128 bits are returned in memory. + Note, int_size_in_bytes can return -1 if the size of the object is + variable or larger than the maximum value that can be expressed as + a HOST_WIDE_INT. It can also return zero for an empty type. The + simplest way to handle variable and empty types is to pass them in + memory. This avoids problems in defining the boundaries of argument + slots, allocating registers, etc. */ + return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) + || int_size_in_bytes (type) <= 0); +} + +/* Structure to hold declaration and name of external symbols that are + emitted by GCC. We generate a vector of these symbols and output them + at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. + This avoids putting out names that are never really used. */ + +typedef struct GTY(()) extern_symbol +{ + tree decl; + const char *name; +} extern_symbol; + +/* Define gc'd vector type for extern_symbol. */ +DEF_VEC_O(extern_symbol); +DEF_VEC_ALLOC_O(extern_symbol,gc); + +/* Vector of extern_symbol pointers. */ +static GTY(()) VEC(extern_symbol,gc) *extern_symbols; + +#ifdef ASM_OUTPUT_EXTERNAL_REAL +/* Mark DECL (name NAME) as an external reference (assembler output + file FILE). This saves the names to output at the end of the file + if actually referenced. */ + +void +pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) +{ + extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL); + + gcc_assert (file == asm_out_file); + p->decl = decl; + p->name = name; +} + +/* Output text required at the end of an assembler file. + This includes deferred plabels and .import directives for + all external symbols that were actually referenced. */ + +static void +pa_hpux_file_end (void) +{ + unsigned int i; + extern_symbol *p; + + if (!NO_DEFERRED_PROFILE_COUNTERS) + output_deferred_profile_counters (); + + output_deferred_plabels (); + + for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++) + { + tree decl = p->decl; + + if (!TREE_ASM_WRITTEN (decl) + && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) + ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); + } + + VEC_free (extern_symbol, gc, extern_symbols); +} +#endif + +/* Return true if a change from mode FROM to mode TO for a register + in register class RCLASS is invalid. */ + +bool +pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, + enum reg_class rclass) +{ + if (from == to) + return false; + + /* Reject changes to/from complex and vector modes. */ + if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) + || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) + return true; + + if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) + return false; + + /* There is no way to load QImode or HImode values directly from + memory. SImode loads to the FP registers are not zero extended. + On the 64-bit target, this conflicts with the definition of + LOAD_EXTEND_OP. Thus, we can't allow changing between modes + with different sizes in the floating-point registers. */ + if (MAYBE_FP_REG_CLASS_P (rclass)) + return true; + + /* HARD_REGNO_MODE_OK places modes with sizes larger than a word + in specific sets of registers. Thus, we cannot allow changing + to a larger mode when it's larger than a word. */ + if (GET_MODE_SIZE (to) > UNITS_PER_WORD + && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) + return true; + + return false; +} + +/* Returns TRUE if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be FALSE for correct output. + + We should return FALSE for QImode and HImode because these modes + are not ok in the floating-point registers. However, this prevents + tieing these modes to SImode and DImode in the general registers. + So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and + CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used + in the floating-point registers. */ + +bool +pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +{ + /* Don't tie modes in different classes. */ + if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) + return false; + + return true; +} + + +/* Length in units of the trampoline instruction code. */ + +#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) + + +/* Output assembler code for a block containing the constant parts + of a trampoline, leaving space for the variable parts.\ + + The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM + and then branches to the specified routine. + + This code template is copied from text segment to stack location + and then patched with pa_trampoline_init to contain valid values, + and then entered as a subroutine. + + It is best to keep this as small as possible to avoid having to + flush multiple lines in the cache. */ + +static void +pa_asm_trampoline_template (FILE *f) +{ + if (!TARGET_64BIT) + { + fputs ("\tldw 36(%r22),%r21\n", f); + fputs ("\tbb,>=,n %r21,30,.+16\n", f); + if (ASSEMBLER_DIALECT == 0) + fputs ("\tdepi 0,31,2,%r21\n", f); + else + fputs ("\tdepwi 0,31,2,%r21\n", f); + fputs ("\tldw 4(%r21),%r19\n", f); + fputs ("\tldw 0(%r21),%r21\n", f); + if (TARGET_PA_20) + { + fputs ("\tbve (%r21)\n", f); + fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + } + else + { + fputs ("\tldsid (%r21),%r1\n", f); + fputs ("\tmtsp %r1,%sr0\n", f); + fputs ("\tbe 0(%sr0,%r21)\n", f); + fputs ("\tldw 40(%r22),%r29\n", f); + } + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + fputs ("\t.word 0\n", f); + } + else + { + fputs ("\t.dword 0\n", f); + fputs ("\t.dword 0\n", f); + fputs ("\t.dword 0\n", f); + fputs ("\t.dword 0\n", f); + fputs ("\tmfia %r31\n", f); + fputs ("\tldd 24(%r31),%r1\n", f); + fputs ("\tldd 24(%r1),%r27\n", f); + fputs ("\tldd 16(%r1),%r1\n", f); + fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 32(%r31),%r31\n", f); + fputs ("\t.dword 0 ; fptr\n", f); + fputs ("\t.dword 0 ; static link\n", f); + } +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. + + Move the function address to the trampoline template at offset 36. + Move the static chain value to trampoline template at offset 40. + Move the trampoline address to trampoline template at offset 44. + Move r19 to trampoline template at offset 48. The latter two + words create a plabel for the indirect call to the trampoline. + + A similar sequence is used for the 64-bit port but the plabel is + at the beginning of the trampoline. + + Finally, the cache entries for the trampoline code are flushed. + This is necessary to ensure that the trampoline instruction sequence + is written to memory prior to any attempts at prefetching the code + sequence. */ + +static void +pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx start_addr = gen_reg_rtx (Pmode); + rtx end_addr = gen_reg_rtx (Pmode); + rtx line_length = gen_reg_rtx (Pmode); + rtx r_tramp, tmp; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); + + if (!TARGET_64BIT) + { + tmp = adjust_address (m_tramp, Pmode, 36); + emit_move_insn (tmp, fnaddr); + tmp = adjust_address (m_tramp, Pmode, 40); + emit_move_insn (tmp, chain_value); + + /* Create a fat pointer for the trampoline. */ + tmp = adjust_address (m_tramp, Pmode, 44); + emit_move_insn (tmp, r_tramp); + tmp = adjust_address (m_tramp, Pmode, 48); + emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); + + /* fdc and fic only use registers for the address to flush, + they do not accept integer displacements. We align the + start and end addresses to the beginning of their respective + cache lines to minimize the number of lines flushed. */ + emit_insn (gen_andsi3 (start_addr, r_tramp, + GEN_INT (-MIN_CACHELINE_SIZE))); + tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1)); + emit_insn (gen_andsi3 (end_addr, tmp, + GEN_INT (-MIN_CACHELINE_SIZE))); + emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); + emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); + emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, + gen_reg_rtx (Pmode), + gen_reg_rtx (Pmode))); + } + else + { + tmp = adjust_address (m_tramp, Pmode, 56); + emit_move_insn (tmp, fnaddr); + tmp = adjust_address (m_tramp, Pmode, 64); + emit_move_insn (tmp, chain_value); + + /* Create a fat pointer for the trampoline. */ + tmp = adjust_address (m_tramp, Pmode, 16); + emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32))); + tmp = adjust_address (m_tramp, Pmode, 24); + emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); + + /* fdc and fic only use registers for the address to flush, + they do not accept integer displacements. We align the + start and end addresses to the beginning of their respective + cache lines to minimize the number of lines flushed. */ + tmp = force_reg (Pmode, plus_constant (r_tramp, 32)); + emit_insn (gen_anddi3 (start_addr, tmp, + GEN_INT (-MIN_CACHELINE_SIZE))); + tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); + emit_insn (gen_anddi3 (end_addr, tmp, + GEN_INT (-MIN_CACHELINE_SIZE))); + emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); + emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); + emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, + gen_reg_rtx (Pmode), + gen_reg_rtx (Pmode))); + } +} + +/* Perform any machine-specific adjustment in the address of the trampoline. + ADDR contains the address that was passed to pa_trampoline_init. + Adjust the trampoline address to point to the plabel at offset 44. */ + +static rtx +pa_trampoline_adjust_address (rtx addr) +{ + if (!TARGET_64BIT) + addr = memory_address (Pmode, plus_constant (addr, 46)); + return addr; +} + +static rtx +pa_delegitimize_address (rtx orig_x) +{ + rtx x = delegitimize_mem_from_attrs (orig_x); + + if (GET_CODE (x) == LO_SUM + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) + return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); + return x; +} + +static rtx +pa_internal_arg_pointer (void) +{ + /* The argument pointer and the hard frame pointer are the same in + the 32-bit runtime, so we don't need a copy. */ + if (TARGET_64BIT) + return copy_to_reg (virtual_incoming_args_rtx); + else + return virtual_incoming_args_rtx; +} + +/* Given FROM and TO register numbers, say whether this elimination is allowed. + Frame pointer elimination is automatically handled. */ + +static bool +pa_can_eliminate (const int from, const int to) +{ + /* The argument cannot be eliminated in the 64-bit runtime. */ + if (TARGET_64BIT && from == ARG_POINTER_REGNUM) + return false; + + return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM + ? ! frame_pointer_needed + : true); +} + +/* Define the offset between two registers, FROM to be eliminated and its + replacement TO, at the start of a routine. */ +HOST_WIDE_INT +pa_initial_elimination_offset (int from, int to) +{ + HOST_WIDE_INT offset; + + if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM) + && to == STACK_POINTER_REGNUM) + offset = -compute_frame_size (get_frame_size (), 0); + else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + offset = 0; + else + gcc_unreachable (); + + return offset; +} + +static void +pa_conditional_register_usage (void) +{ + int i; + + if (!TARGET_64BIT && !TARGET_PA_11) + { + for (i = 56; i <= FP_REG_LAST; i++) + fixed_regs[i] = call_used_regs[i] = 1; + for (i = 33; i < 56; i += 2) + fixed_regs[i] = call_used_regs[i] = 1; + } + if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) + { + for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) + fixed_regs[i] = call_used_regs[i] = 1; + } + if (flag_pic) + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; +} + +/* Target hook for c_mode_for_suffix. */ + +static enum machine_mode +pa_c_mode_for_suffix (char suffix) +{ + if (HPUX_LONG_DOUBLE_LIBRARY) + { + if (suffix == 'q') + return TFmode; + } + + return VOIDmode; +} + +/* Target hook for function_section. */ + +static section * +pa_function_section (tree decl, enum node_frequency freq, + bool startup, bool exit) +{ + /* Put functions in text section if target doesn't have named sections. */ + if (!targetm.have_named_sections) + return text_section; + + /* Force nested functions into the same section as the containing + function. */ + if (decl + && DECL_SECTION_NAME (decl) == NULL_TREE + && DECL_CONTEXT (decl) != NULL_TREE + && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL + && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE) + return function_section (DECL_CONTEXT (decl)); + + /* Otherwise, use the default function section. */ + return default_function_section (decl, freq, startup, exit); +} + +/* Implement TARGET_SECTION_TYPE_FLAGS. */ + +static unsigned int +pa_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags; + + flags = default_section_type_flags (decl, name, reloc); + + /* Function labels are placed in the constant pool. This can + cause a section conflict if decls are put in ".data.rel.ro" + or ".data.rel.ro.local" using the __attribute__ construct. */ + if (strcmp (name, ".data.rel.ro") == 0 + || strcmp (name, ".data.rel.ro.local") == 0) + flags |= SECTION_WRITE | SECTION_RELRO; + + return flags; +} + +#include "gt-pa.h" diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h new file mode 100644 index 000000000..f086daa36 --- /dev/null +++ b/gcc/config/pa/pa.h @@ -0,0 +1,1572 @@ +/* Definitions of target machine for GNU compiler, for the HP Spectrum. + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support + and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for + Software Science at the University of Utah. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* For long call handling. */ +extern unsigned long total_code_bytes; + +/* Which processor to schedule for. */ + +enum processor_type +{ + PROCESSOR_700, + PROCESSOR_7100, + PROCESSOR_7100LC, + PROCESSOR_7200, + PROCESSOR_7300, + PROCESSOR_8000 +}; + +/* For -mschedule= option. */ +extern enum processor_type pa_cpu; + +/* For -munix= option. */ +extern int flag_pa_unix; + +#define pa_cpu_attr ((enum attr_cpu)pa_cpu) + +/* Print subsidiary information on the compiler version in use. */ + +#define TARGET_VERSION fputs (" (hppa)", stderr); + +#define TARGET_PA_10 (!TARGET_PA_11 && !TARGET_PA_20) + +/* Generate code for the HPPA 2.0 architecture in 64bit mode. */ +#ifndef TARGET_64BIT +#define TARGET_64BIT 0 +#endif + +/* Generate code for ELF32 ABI. */ +#ifndef TARGET_ELF32 +#define TARGET_ELF32 0 +#endif + +/* Generate code for SOM 32bit ABI. */ +#ifndef TARGET_SOM +#define TARGET_SOM 0 +#endif + +/* HP-UX UNIX features. */ +#ifndef TARGET_HPUX +#define TARGET_HPUX 0 +#endif + +/* HP-UX 10.10 UNIX 95 features. */ +#ifndef TARGET_HPUX_10_10 +#define TARGET_HPUX_10_10 0 +#endif + +/* HP-UX 11.* features (11.00, 11.11, 11.23, etc.) */ +#ifndef TARGET_HPUX_11 +#define TARGET_HPUX_11 0 +#endif + +/* HP-UX 11i multibyte and UNIX 98 extensions. */ +#ifndef TARGET_HPUX_11_11 +#define TARGET_HPUX_11_11 0 +#endif + +/* HP-UX long double library. */ +#ifndef HPUX_LONG_DOUBLE_LIBRARY +#define HPUX_LONG_DOUBLE_LIBRARY 0 +#endif + +/* The following three defines are potential target switches. The current + defines are optimal given the current capabilities of GAS and GNU ld. */ + +/* Define to a C expression evaluating to true to use long absolute calls. + Currently, only the HP assembler and SOM linker support long absolute + calls. They are used only in non-pic code. */ +#define TARGET_LONG_ABS_CALL (TARGET_SOM && !TARGET_GAS) + +/* Define to a C expression evaluating to true to use long PIC symbol + difference calls. Long PIC symbol difference calls are only used with + the HP assembler and linker. The HP assembler detects this instruction + sequence and treats it as long pc-relative call. Currently, GAS only + allows a difference of two symbols in the same subspace, and it doesn't + detect the sequence as a pc-relative call. */ +#define TARGET_LONG_PIC_SDIFF_CALL (!TARGET_GAS && TARGET_HPUX) + +/* Define to a C expression evaluating to true to use long PIC + pc-relative calls. Long PIC pc-relative calls are only used with + GAS. Currently, they are usable for calls which bind local to a + module but not for external calls. */ +#define TARGET_LONG_PIC_PCREL_CALL 0 + +/* Define to a C expression evaluating to true to use SOM secondary + definition symbols for weak support. Linker support for secondary + definition symbols is buggy prior to HP-UX 11.X. */ +#define TARGET_SOM_SDEF 0 + +/* Define to a C expression evaluating to true to save the entry value + of SP in the current frame marker. This is normally unnecessary. + However, the HP-UX unwind library looks at the SAVE_SP callinfo flag. + HP compilers don't use this flag but it is supported by the assembler. + We set this flag to indicate that register %r3 has been saved at the + start of the frame. Thus, when the HP unwind library is used, we + need to generate additional code to save SP into the frame marker. */ +#define TARGET_HPUX_UNWIND_LIBRARY 0 + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_GAS | MASK_JUMP_IN_DELAY | MASK_BIG_SWITCH) +#endif + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +#ifndef TARGET_SCHED_DEFAULT +#define TARGET_SCHED_DEFAULT PROCESSOR_8000 +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-schedule is ignored if -mschedule is specified. + --with-arch is ignored if -march is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:-march=%(VALUE)}" }, \ + {"schedule", "%{!mschedule=*:-mschedule=%(VALUE)}" } + +/* Specify the dialect of assembler to use. New mnemonics is dialect one + and the old mnemonics are dialect zero. */ +#define ASSEMBLER_DIALECT (TARGET_PA_20 ? 1 : 0) + +/* Override some settings from dbxelf.h. */ + +/* We do not have to be compatible with dbx, so we enable gdb extensions + by default. */ +#define DEFAULT_GDB_EXTENSIONS 1 + +/* This used to be zero (no max length), but big enums and such can + cause huge strings which killed gas. + + We also have to avoid lossage in dbxout.c -- it does not compute the + string size accurately, so we are real conservative here. */ +#undef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 3000 + +/* GDB always assumes the current function's frame begins at the value + of the stack pointer upon entry to the current function. Accessing + local variables and parameters passed on the stack is done using the + base of the frame + an offset provided by GCC. + + For functions which have frame pointers this method works fine; + the (frame pointer) == (stack pointer at function entry) and GCC provides + an offset relative to the frame pointer. + + This loses for functions without a frame pointer; GCC provides an offset + which is relative to the stack pointer after adjusting for the function's + frame size. GDB would prefer the offset to be relative to the value of + the stack pointer at the function's entry. Yuk! */ +#define DEBUGGER_AUTO_OFFSET(X) \ + ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) \ + + (frame_pointer_needed ? 0 : compute_frame_size (get_frame_size (), 0))) + +#define DEBUGGER_ARG_OFFSET(OFFSET, X) \ + ((GET_CODE (X) == PLUS ? OFFSET : 0) \ + + (frame_pointer_needed ? 0 : compute_frame_size (get_frame_size (), 0))) + +#define TARGET_CPU_CPP_BUILTINS() \ +do { \ + builtin_assert("cpu=hppa"); \ + builtin_assert("machine=hppa"); \ + builtin_define("__hppa"); \ + builtin_define("__hppa__"); \ + if (TARGET_PA_20) \ + builtin_define("_PA_RISC2_0"); \ + else if (TARGET_PA_11) \ + builtin_define("_PA_RISC1_1"); \ + else \ + builtin_define("_PA_RISC1_0"); \ +} while (0) + +/* An old set of OS defines for various BSD-like systems. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("REVARGV"); \ + builtin_define_std ("hp800"); \ + builtin_define_std ("hp9000"); \ + builtin_define_std ("hp9k8"); \ + if (!c_dialect_cxx () && !flag_iso) \ + builtin_define ("hppa"); \ + builtin_define_std ("spectrum"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=bsd"); \ + builtin_assert ("system=unix"); \ + } \ + while (0) + +#define CC1_SPEC "%{pg:} %{p:}" + +#define LINK_SPEC "%{mlinker-opt:-O} %{!shared:-u main} %{shared:-b}" + +/* We don't want -lg. */ +#ifndef LIB_SPEC +#define LIB_SPEC "%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}" +#endif + +/* Make gcc agree with <machine/ansi.h> */ + +#define SIZE_TYPE "unsigned int" +#define PTRDIFF_TYPE "int" +#define WCHAR_TYPE "unsigned int" +#define WCHAR_TYPE_SIZE 32 + +/* target machine storage layout */ +typedef struct GTY(()) machine_function +{ + /* Flag indicating that a .NSUBSPA directive has been output for + this function. */ + int in_nsubspa; +} machine_function; + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + (MODE) = word_mode; + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 1 + +/* Define this if most significant byte of a word is the lowest numbered. */ +/* That is true on the HP-PA. */ +#define BYTES_BIG_ENDIAN 1 + +/* Define this if most significant word of a multiword number is lowest + numbered. */ +#define WORDS_BIG_ENDIAN 1 + +#define MAX_BITS_PER_WORD 64 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) + +/* Minimum number of units in a word. If this is undefined, the default + is UNITS_PER_WORD. Otherwise, it is the constant value that is the + smallest value that UNITS_PER_WORD can have at run-time. + + FIXME: This needs to be 4 when TARGET_64BIT is true to suppress the + building of various TImode routines in libgcc. The HP runtime + specification doesn't provide the alignment requirements and calling + conventions for TImode variables. */ +#define MIN_UNITS_PER_WORD 4 + +/* The widest floating point format supported by the hardware. Note that + setting this influences some Ada floating point type sizes, currently + required for GNAT to operate properly. */ +#define WIDEST_HARDWARE_FP_SIZE 64 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY BITS_PER_WORD + +/* Largest alignment required for any stack parameter, in bits. + Don't define this if it is equal to PARM_BOUNDARY */ +#define MAX_PARM_BOUNDARY BIGGEST_ALIGNMENT + +/* Boundary (in *bits*) on which stack pointer is always aligned; + certain optimizations in combine depend on this. + + The HP-UX runtime documents mandate 64-byte and 16-byte alignment for + the stack on the 32 and 64-bit ports, respectively. However, we + are only guaranteed that the stack is aligned to BIGGEST_ALIGNMENT + in main. Thus, we treat the former as the preferred alignment. */ +#define STACK_BOUNDARY BIGGEST_ALIGNMENT +#define PREFERRED_STACK_BOUNDARY (TARGET_64BIT ? 128 : 512) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY BITS_PER_WORD + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT (2 * BITS_PER_WORD) + +/* Get around hp-ux assembler bug, and make strcpy of constants fast. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + pa_modes_tieable_p (MODE1, MODE2) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* The HP-PA pc isn't overloaded on a register that the compiler knows about. */ +/* #define PC_REGNUM */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 30 + +/* Fixed register for local variable access. Always eliminated. */ +#define FRAME_POINTER_REGNUM (TARGET_64BIT ? 61 : 89) + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 3 + +/* Don't allow hard registers to be renamed into r2 unless r2 + is already live or already being saved (due to eh). */ + +#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ + ((NEW_REG) != 2 || df_regs_ever_live_p (2) || crtl->calls_eh_return) + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM (TARGET_64BIT ? 29 : 3) + +/* Register in which static-chain is passed to a function. */ +#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? 31 : 29) + +/* Register used to address the offset table for position-independent + data references. */ +#define PIC_OFFSET_TABLE_REGNUM \ + (flag_pic ? (TARGET_64BIT ? 27 : 19) : INVALID_REGNUM) + +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED 1 + +/* Function to return the rtx used to save the pic offset table register + across function calls. */ +extern struct rtx_def *hppa_pic_save_rtx (void); + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Register in which address to store a structure value + is passed to a function. */ +#define PA_STRUCT_VALUE_REGNUM 28 + +/* Definitions for register eliminations. + + We have two registers that can be eliminated. First, the frame pointer + register can often be eliminated in favor of the stack pointer register. + Secondly, the argument pointer register can always be eliminated in the + 32-bit runtimes. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + The argument pointer cannot be eliminated in the 64-bit runtime. It + is the same register as the hard frame pointer in the 32-bit runtime. + So, it does not need to be listed. */ +#define ELIMINABLE_REGS \ +{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} } + +/* Define the offset between two registers, one to be eliminated, + and the other its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = pa_initial_elimination_offset(FROM, TO)) + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < 3 ? (N) + 20 : (N) == 3 ? 31 : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 29) +#define EH_RETURN_HANDLER_RTX pa_eh_return_handler_rtx () + +/* Offset from the frame pointer register value to the top of stack. */ +#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0 + +/* The maximum number of hard registers that can be saved in the call + frame. The soft frame pointer is not included. */ +#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1) + +/* A C expression whose value is RTL representing the location of the + incoming return address at the beginning of any function, before the + prologue. You only need to define this macro if you want to support + call frame debugging information like that provided by DWARF 2. */ +#define INCOMING_RETURN_ADDR_RTX (gen_rtx_REG (word_mode, 2)) +#define DWARF_FRAME_RETURN_COLUMN (DWARF_FRAME_REGNUM (2)) + +/* A C expression whose value is an integer giving a DWARF 2 column + number that may be used as an alternate return column. This should + be defined only if DWARF_FRAME_RETURN_COLUMN is set to a general + register, but an alternate column needs to be used for signal frames. + + Column 0 is not used but unfortunately its register size is set to + 4 bytes (sizeof CCmode) so it can't be used on 64-bit targets. */ +#define DWARF_ALT_FRAME_RETURN_COLUMN (FIRST_PSEUDO_REGISTER - 1) + +/* This macro chooses the encoding of pointers embedded in the exception + handling sections. If at all possible, this should be defined such + that the exception handling section will not require dynamic relocations, + and so may be read-only. + + Because the HP assembler auto aligns, it is necessary to use + DW_EH_PE_aligned. It's not possible to make the data read-only + on the HP-UX SOM port since the linker requires fixups for label + differences in different sections to be word aligned. However, + the SOM linker can do unaligned fixups for absolute pointers. + We also need aligned pointers for global and function pointers. + + Although the HP-UX 64-bit ELF linker can handle unaligned pc-relative + fixups, the runtime doesn't have a consistent relationship between + text and data for dynamically loaded objects. Thus, it's not possible + to use pc-relative encoding for pointers on this target. It may be + possible to use segment relative encodings but GAS doesn't currently + have a mechanism to generate these encodings. For other targets, we + use pc-relative encoding for pointers. If the pointer might require + dynamic relocation, we make it indirect. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (TARGET_GAS && !TARGET_HPUX \ + ? (DW_EH_PE_pcrel \ + | ((GLOBAL) || (CODE) == 2 ? DW_EH_PE_indirect : 0) \ + | (TARGET_64BIT ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)) \ + : (!TARGET_GAS || (GLOBAL) || (CODE) == 2 \ + ? DW_EH_PE_aligned : DW_EH_PE_absptr)) + +/* Handle special EH pointer encodings. Absolute, pc-relative, and + indirect are handled automatically. We output pc-relative, and + indirect pc-relative ourself since we need some special magic to + generate pc-relative relocations, and to handle indirect function + pointers. */ +#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \ + do { \ + if (((ENCODING) & 0x70) == DW_EH_PE_pcrel) \ + { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + if ((ENCODING) & DW_EH_PE_indirect) \ + output_addr_const (FILE, get_deferred_plabel (ADDR)); \ + else \ + assemble_name (FILE, XSTR ((ADDR), 0)); \ + fputs ("+8-$PIC_pcrel$0", FILE); \ + goto DONE; \ + } \ + } while (0) + + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS GENERAL_REGS +#define BASE_REG_CLASS GENERAL_REGS + +#define FP_REG_CLASS_P(CLASS) \ + ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS) + +/* True if register is floating-point. */ +#define FP_REGNO_P(N) ((N) >= FP_REG_FIRST && (N) <= FP_REG_LAST) + +#define MAYBE_FP_REG_CLASS_P(CLASS) \ + reg_classes_intersect_p ((CLASS), FP_REGS) + + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +/* #define STACK_GROWS_DOWNWARD */ + +/* Believe it or not. */ +#define ARGS_GROW_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 0 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. + + On the 32-bit ports, we reserve one slot for the previous frame + pointer and one fill slot. The fill slot is for compatibility + with HP compiled programs. On the 64-bit ports, we reserve one + slot for the previous frame pointer. */ +#define STARTING_FRAME_OFFSET 8 + +/* Define STACK_ALIGNMENT_NEEDED to zero to disable final alignment + of the stack. The default is to align it to STACK_BOUNDARY. */ +#define STACK_ALIGNMENT_NEEDED 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. + On the HP-PA, don't define this because there are no push insns. */ +/* #define PUSH_ROUNDING(BYTES) */ + +/* Offset of first parameter from the argument pointer register value. + This value will be negated because the arguments grow down. + Also note that on STACK_GROWS_UPWARD machines (such as this one) + this is the distance from the frame pointer to the end of the first + argument, not it's beginning. To get the real offset of the first + argument, the size of the argument must be added. */ + +#define FIRST_PARM_OFFSET(FNDECL) (TARGET_64BIT ? -64 : -32) + +/* When a parameter is passed in a register, stack space is still + allocated for it. */ +#define REG_PARM_STACK_SPACE(DECL) (TARGET_64BIT ? 64 : 16) + +/* Define this if the above stack space is to be considered part of the + space allocated by the caller. */ +#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +/* Keep the stack pointer constant throughout the function. + This is both an optimization and a necessity: longjmp + doesn't behave itself when the stack pointer moves within + the function! */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* The weird HPPA calling conventions require a minimum of 48 bytes on + the stack: 16 bytes for register saves, and 32 bytes for magic. + This is the difference between the logical top of stack and the + actual sp. + + On the 64-bit port, the HP C compiler allocates a 48-byte frame + marker, although the runtime documentation only describes a 16 + byte marker. For compatibility, we allocate 48 bytes. */ +#define STACK_POINTER_OFFSET \ + (TARGET_64BIT ? -(crtl->outgoing_args_size + 48): -32) + +#define STACK_DYNAMIC_OFFSET(FNDECL) \ + (TARGET_64BIT \ + ? (STACK_POINTER_OFFSET) \ + : ((STACK_POINTER_OFFSET) - crtl->outgoing_args_size)) + + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On the HP-PA, the WORDS field holds the number of words + of arguments scanned so far (including the invisible argument, + if any, which holds the structure-value-address). Thus, 4 or + more means all following args should go on the stack. + + The INCOMING field tracks whether this is an "incoming" or + "outgoing" argument. + + The INDIRECT field indicates whether this is is an indirect + call or not. + + The NARGS_PROTOTYPE field indicates that an argument does not + have a prototype when it less than or equal to 0. */ + +struct hppa_args {int words, nargs_prototype, incoming, indirect; }; + +#define CUMULATIVE_ARGS struct hppa_args + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + (CUM).words = 0, \ + (CUM).incoming = 0, \ + (CUM).indirect = (FNTYPE) && !(FNDECL), \ + (CUM).nargs_prototype = (FNTYPE && prototype_p (FNTYPE) \ + ? (list_length (TYPE_ARG_TYPES (FNTYPE)) - 1 \ + + (TYPE_MODE (TREE_TYPE (FNTYPE)) == BLKmode \ + || pa_return_in_memory (TREE_TYPE (FNTYPE), 0))) \ + : 0) + + + +/* Similar, but when scanning the definition of a procedure. We always + set NARGS_PROTOTYPE large so we never return a PARALLEL. */ + +#define INIT_CUMULATIVE_INCOMING_ARGS(CUM,FNTYPE,IGNORE) \ + (CUM).words = 0, \ + (CUM).incoming = 1, \ + (CUM).indirect = 0, \ + (CUM).nargs_prototype = 1000 + +/* Figure out the size in words of the function argument. The size + returned by this macro should always be greater than zero because + we pass variable and zero sized objects by reference. */ + +#define FUNCTION_ARG_SIZE(MODE, TYPE) \ + ((((MODE) != BLKmode \ + ? (HOST_WIDE_INT) GET_MODE_SIZE (MODE) \ + : int_size_in_bytes (TYPE)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On the HP-PA the first four words of args are normally in registers + and the rest are pushed. But any arg that won't entirely fit in regs + is pushed. + + Arguments passed in registers are either 1 or 2 words long. + + The caller must make a distinction between calls to explicitly named + functions and calls through pointers to functions -- the conventions + are different! Calls through pointers to functions only use general + registers for the first four argument words. + + Of course all this is different for the portable runtime model + HP wants everyone to use for ELF. Ugh. Here's a quick description + of how it's supposed to work. + + 1) callee side remains unchanged. It expects integer args to be + in the integer registers, float args in the float registers and + unnamed args in integer registers. + + 2) caller side now depends on if the function being called has + a prototype in scope (rather than if it's being called indirectly). + + 2a) If there is a prototype in scope, then arguments are passed + according to their type (ints in integer registers, floats in float + registers, unnamed args in integer registers. + + 2b) If there is no prototype in scope, then floating point arguments + are passed in both integer and float registers. egad. + + FYI: The portable parameter passing conventions are almost exactly like + the standard parameter passing conventions on the RS6000. That's why + you'll see lots of similar code in rs6000.h. */ + +/* If defined, a C expression which determines whether, and in which + direction, to pad out an argument with extra space. */ +#define FUNCTION_ARG_PADDING(MODE, TYPE) function_arg_padding ((MODE), (TYPE)) + +/* Specify padding for the last element of a block move between registers + and memory. + + The 64-bit runtime specifies that objects need to be left justified + (i.e., the normal justification for a big endian target). The 32-bit + runtime specifies right justification for objects smaller than 64 bits. + We use a DImode register in the parallel for 5 to 7 byte structures + so that there is only one element. This allows the object to be + correctly padded. */ +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + function_arg_padding ((MODE), (TYPE)) + + +/* On HPPA, we emit profiling code as rtl via PROFILE_HOOK rather than + as assembly via FUNCTION_PROFILER. Just output a local label. + We can't use the function label because the GAS SOM target can't + handle the difference of a global symbol and a local symbol. */ + +#ifndef FUNC_BEGIN_PROLOG_LABEL +#define FUNC_BEGIN_PROLOG_LABEL "LFBP" +#endif + +#define FUNCTION_PROFILER(FILE, LABEL) \ + (*targetm.asm_out.internal_label) (FILE, FUNC_BEGIN_PROLOG_LABEL, LABEL) + +#define PROFILE_HOOK(label_no) hppa_profile_hook (label_no) +void hppa_profile_hook (int label_no); + +/* The profile counter if emitted must come before the prologue. */ +#define PROFILE_BEFORE_PROLOGUE 1 + +/* We never want final.c to emit profile counters. When profile + counters are required, we have to defer emitting them to the end + of the current file. */ +#define NO_PROFILE_COUNTERS 1 + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +extern int may_call_alloca; + +#define EXIT_IGNORE_STACK \ + (get_frame_size () != 0 \ + || cfun->calls_alloca || crtl->outgoing_args_size) + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52) + +/* Alignment required by the trampoline. */ + +#define TRAMPOLINE_ALIGNMENT BITS_PER_WORD + +/* Minimum length of a cache line. A length of 16 will work on all + PA-RISC processors. All PA 1.1 processors have a cache line of + 32 bytes. Most but not all PA 2.0 processors have a cache line + of 64 bytes. As cache flushes are expensive and we don't support + PA 1.0, we use a minimum length of 32. */ + +#define MIN_CACHELINE_SIZE 32 + + +/* Addressing modes, and classification of registers for them. + + Using autoincrement addressing modes on PA8000 class machines is + not profitable. */ + +#define HAVE_POST_INCREMENT (pa_cpu < PROCESSOR_8000) +#define HAVE_POST_DECREMENT (pa_cpu < PROCESSOR_8000) + +#define HAVE_PRE_DECREMENT (pa_cpu < PROCESSOR_8000) +#define HAVE_PRE_INCREMENT (pa_cpu < PROCESSOR_8000) + +/* Macros to check register numbers against specific register classes. */ + +/* The following macros assume that X is a hard or pseudo reg number. + They give nonzero only if X is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ + +#define REGNO_OK_FOR_INDEX_P(X) \ + ((X) && ((X) < 32 \ + || ((X) == FRAME_POINTER_REGNUM) \ + || ((X) >= FIRST_PSEUDO_REGISTER \ + && reg_renumber \ + && (unsigned) reg_renumber[X] < 32))) +#define REGNO_OK_FOR_BASE_P(X) \ + ((X) && ((X) < 32 \ + || ((X) == FRAME_POINTER_REGNUM) \ + || ((X) >= FIRST_PSEUDO_REGISTER \ + && reg_renumber \ + && (unsigned) reg_renumber[X] < 32))) +#define REGNO_OK_FOR_FP_P(X) \ + (FP_REGNO_P (X) \ + || (X >= FIRST_PSEUDO_REGISTER \ + && reg_renumber \ + && FP_REGNO_P (reg_renumber[X]))) + +/* Now macros that check whether X is a register and also, + strictly, whether it is in a specified class. + + These macros are specific to the HP-PA, and may be used only + in code for printing assembler insns and in conditions for + define_optimization. */ + +/* 1 if X is an fp register. */ + +#define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X))) + +/* Maximum number of registers that can appear in a valid memory address. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* Non-TLS symbolic references. */ +#define PA_SYMBOL_REF_TLS_P(RTX) \ + (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0) + +/* Recognize any constant value that is a valid address except + for symbolic addresses. We get better CSE by rejecting them + here and allowing hppa_legitimize_address to break them up. We + use most of the constants accepted by CONSTANT_P, except CONST_DOUBLE. */ + +#define CONSTANT_ADDRESS_P(X) \ + ((GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == SYMBOL_REF && !SYMBOL_REF_TLS_MODEL (X)) \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST \ + || GET_CODE (X) == HIGH) \ + && (reload_in_progress || reload_completed || ! symbolic_expression_p (X))) + +/* A C expression that is nonzero if we are using the new HP assembler. */ + +#ifndef NEW_HP_ASSEMBLER +#define NEW_HP_ASSEMBLER 0 +#endif + +/* The macros below define the immediate range for CONST_INTS on + the 64-bit port. Constants in this range can be loaded in three + instructions using a ldil/ldo/depdi sequence. Constants outside + this range are forced to the constant pool prior to reload. */ + +#define MAX_LEGIT_64BIT_CONST_INT ((HOST_WIDE_INT) 32 << 31) +#define MIN_LEGIT_64BIT_CONST_INT ((HOST_WIDE_INT) -32 << 31) +#define LEGITIMATE_64BIT_CONST_INT_P(X) \ + ((X) >= MIN_LEGIT_64BIT_CONST_INT && (X) < MAX_LEGIT_64BIT_CONST_INT) + +/* A C expression that is nonzero if X is a legitimate constant for an + immediate operand. + + We include all constant integers and constant doubles, but not + floating-point, except for floating-point zero. We reject LABEL_REFs + if we're not using gas or the new HP assembler. + + In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS + that need more than three instructions to load prior to reload. This + limit is somewhat arbitrary. It takes three instructions to load a + CONST_INT from memory but two are memory accesses. It may be better + to increase the allowed range for CONST_INTS. We may also be able + to handle CONST_DOUBLES. */ + +#define LEGITIMATE_CONSTANT_P(X) \ + ((GET_MODE_CLASS (GET_MODE (X)) != MODE_FLOAT \ + || (X) == CONST0_RTX (GET_MODE (X))) \ + && (NEW_HP_ASSEMBLER \ + || TARGET_GAS \ + || GET_CODE (X) != LABEL_REF) \ + && !PA_SYMBOL_REF_TLS_P (X) \ + && (!TARGET_64BIT \ + || GET_CODE (X) != CONST_DOUBLE) \ + && (!TARGET_64BIT \ + || HOST_BITS_PER_WIDE_INT <= 32 \ + || GET_CODE (X) != CONST_INT \ + || reload_in_progress \ + || reload_completed \ + || LEGITIMATE_64BIT_CONST_INT_P (INTVAL (X)) \ + || cint_ok_for_move (INTVAL (X))) \ + && !function_label_operand (X, VOIDmode)) + +/* Target flags set on a symbol_ref. */ + +/* Set by ASM_OUTPUT_SYMBOL_REF when a symbol_ref is output. */ +#define SYMBOL_FLAG_REFERENCED (1 << SYMBOL_FLAG_MACH_DEP_SHIFT) +#define SYMBOL_REF_REFERENCED_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_REFERENCED) != 0) + +/* Defines for constraints.md. */ + +/* Return 1 iff OP is a scaled or unscaled index address. */ +#define IS_INDEX_ADDR_P(OP) \ + (GET_CODE (OP) == PLUS \ + && GET_MODE (OP) == Pmode \ + && (GET_CODE (XEXP (OP, 0)) == MULT \ + || GET_CODE (XEXP (OP, 1)) == MULT \ + || (REG_P (XEXP (OP, 0)) \ + && REG_P (XEXP (OP, 1))))) + +/* Return 1 iff OP is a LO_SUM DLT address. */ +#define IS_LO_SUM_DLT_ADDR_P(OP) \ + (GET_CODE (OP) == LO_SUM \ + && GET_MODE (OP) == Pmode \ + && REG_P (XEXP (OP, 0)) \ + && REG_OK_FOR_BASE_P (XEXP (OP, 0)) \ + && GET_CODE (XEXP (OP, 1)) == UNSPEC) + +/* Nonzero if 14-bit offsets can be used for all loads and stores. + This is not possible when generating PA 1.x code as floating point + loads and stores only support 5-bit offsets. Note that we do not + forbid the use of 14-bit offsets in GO_IF_LEGITIMATE_ADDRESS. + Instead, we use pa_secondary_reload() to reload integer mode + REG+D memory addresses used in floating point loads and stores. + + FIXME: the ELF32 linker clobbers the LSB of the FP register number + in PA 2.0 floating-point insns with long displacements. This is + because R_PARISC_DPREL14WR and other relocations like it are not + yet supported by GNU ld. For now, we reject long displacements + on this target. */ + +#define INT14_OK_STRICT \ + (TARGET_SOFT_FLOAT \ + || TARGET_DISABLE_FPREGS \ + || (TARGET_PA_20 && !TARGET_ELF32)) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +#ifndef REG_OK_STRICT + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) \ + (REGNO (X) && (REGNO (X) < 32 \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER)) + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define REG_OK_FOR_BASE_P(X) \ + (REGNO (X) && (REGNO (X) < 32 \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER)) + +#else + +/* Nonzero if X is a hard reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) + +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#endif + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a + valid memory address for an instruction. The MODE argument is the + machine mode for the MEM expression that wants to use this address. + + On HP PA-RISC, the legitimate address forms are REG+SMALLINT, + REG+REG, and REG+(REG*SCALE). The indexed address forms are only + available with floating point loads and stores, and integer loads. + We get better code by allowing indexed addresses in the initial + RTL generation. + + The acceptance of indexed addresses as legitimate implies that we + must provide patterns for doing indexed integer stores, or the move + expanders must force the address of an indexed store to a register. + We have adopted the latter approach. + + Another function of GO_IF_LEGITIMATE_ADDRESS is to ensure that + the base register is a valid pointer for indexed instructions. + On targets that have non-equivalent space registers, we have to + know at the time of assembler output which register in a REG+REG + pair is the base register. The REG_POINTER flag is sometimes lost + in reload and the following passes, so it can't be relied on during + code generation. Thus, we either have to canonicalize the order + of the registers in REG+REG indexed addresses, or treat REG+REG + addresses separately and provide patterns for both permutations. + + The latter approach requires several hundred additional lines of + code in pa.md. The downside to canonicalizing is that a PLUS + in the wrong order can't combine to form to make a scaled indexed + memory operand. As we won't need to canonicalize the operands if + the REG_POINTER lossage can be fixed, it seems better canonicalize. + + We initially break out scaled indexed addresses in canonical order + in emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes + scaled indexed addresses during RTL generation. However, fold_rtx + has its own opinion on how the operands of a PLUS should be ordered. + If one of the operands is equivalent to a constant, it will make + that operand the second operand. As the base register is likely to + be equivalent to a SYMBOL_REF, we have made it the second operand. + + GO_IF_LEGITIMATE_ADDRESS accepts REG+REG as legitimate when the + operands are in the order INDEX+BASE on targets with non-equivalent + space registers, and in any order on targets with equivalent space + registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing. + + We treat a SYMBOL_REF as legitimate if it is part of the current + function's constant-pool, because such addresses can actually be + output as REG+SMALLINT. */ + +#define VAL_5_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x10 < 0x20) +#define INT_5_BITS(X) VAL_5_BITS_P (INTVAL (X)) + +#define VAL_U5_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) < 0x20) +#define INT_U5_BITS(X) VAL_U5_BITS_P (INTVAL (X)) + +#define VAL_11_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x400 < 0x800) +#define INT_11_BITS(X) VAL_11_BITS_P (INTVAL (X)) + +#define VAL_14_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x2000 < 0x4000) +#define INT_14_BITS(X) VAL_14_BITS_P (INTVAL (X)) + +#if HOST_BITS_PER_WIDE_INT > 32 +#define VAL_32_BITS_P(X) \ + ((unsigned HOST_WIDE_INT)(X) + ((unsigned HOST_WIDE_INT) 1 << 31) \ + < (unsigned HOST_WIDE_INT) 2 << 31) +#else +#define VAL_32_BITS_P(X) 1 +#endif +#define INT_32_BITS(X) VAL_32_BITS_P (INTVAL (X)) + +/* These are the modes that we allow for scaled indexing. */ +#define MODE_OK_FOR_SCALED_INDEXING_P(MODE) \ + ((TARGET_64BIT && (MODE) == DImode) \ + || (MODE) == SImode \ + || (MODE) == HImode \ + || (MODE) == SFmode \ + || (MODE) == DFmode) + +/* These are the modes that we allow for unscaled indexing. */ +#define MODE_OK_FOR_UNSCALED_INDEXING_P(MODE) \ + ((TARGET_64BIT && (MODE) == DImode) \ + || (MODE) == SImode \ + || (MODE) == HImode \ + || (MODE) == QImode \ + || (MODE) == SFmode \ + || (MODE) == DFmode) + +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ +{ \ + if ((REG_P (X) && REG_OK_FOR_BASE_P (X)) \ + || ((GET_CODE (X) == PRE_DEC || GET_CODE (X) == POST_DEC \ + || GET_CODE (X) == PRE_INC || GET_CODE (X) == POST_INC) \ + && REG_P (XEXP (X, 0)) \ + && REG_OK_FOR_BASE_P (XEXP (X, 0)))) \ + goto ADDR; \ + else if (GET_CODE (X) == PLUS) \ + { \ + rtx base = 0, index = 0; \ + if (REG_P (XEXP (X, 1)) \ + && REG_OK_FOR_BASE_P (XEXP (X, 1))) \ + base = XEXP (X, 1), index = XEXP (X, 0); \ + else if (REG_P (XEXP (X, 0)) \ + && REG_OK_FOR_BASE_P (XEXP (X, 0))) \ + base = XEXP (X, 0), index = XEXP (X, 1); \ + if (base \ + && GET_CODE (index) == CONST_INT \ + && ((INT_14_BITS (index) \ + && (((MODE) != DImode \ + && (MODE) != SFmode \ + && (MODE) != DFmode) \ + /* The base register for DImode loads and stores \ + with long displacements must be aligned because \ + the lower three bits in the displacement are \ + assumed to be zero. */ \ + || ((MODE) == DImode \ + && (!TARGET_64BIT \ + || (INTVAL (index) % 8) == 0)) \ + /* Similarly, the base register for SFmode/DFmode \ + loads and stores with long displacements must \ + be aligned. */ \ + || (((MODE) == SFmode || (MODE) == DFmode) \ + && INT14_OK_STRICT \ + && (INTVAL (index) % GET_MODE_SIZE (MODE)) == 0))) \ + || INT_5_BITS (index))) \ + goto ADDR; \ + if (!TARGET_DISABLE_INDEXING \ + /* Only accept the "canonical" INDEX+BASE operand order \ + on targets with non-equivalent space registers. */ \ + && (TARGET_NO_SPACE_REGS \ + ? (base && REG_P (index)) \ + : (base == XEXP (X, 1) && REG_P (index) \ + && (reload_completed \ + || (reload_in_progress && HARD_REGISTER_P (base)) \ + || REG_POINTER (base)) \ + && (reload_completed \ + || (reload_in_progress && HARD_REGISTER_P (index)) \ + || !REG_POINTER (index)))) \ + && MODE_OK_FOR_UNSCALED_INDEXING_P (MODE) \ + && REG_OK_FOR_INDEX_P (index) \ + && borx_reg_operand (base, Pmode) \ + && borx_reg_operand (index, Pmode)) \ + goto ADDR; \ + if (!TARGET_DISABLE_INDEXING \ + && base \ + && GET_CODE (index) == MULT \ + && MODE_OK_FOR_SCALED_INDEXING_P (MODE) \ + && REG_P (XEXP (index, 0)) \ + && GET_MODE (XEXP (index, 0)) == Pmode \ + && REG_OK_FOR_INDEX_P (XEXP (index, 0)) \ + && GET_CODE (XEXP (index, 1)) == CONST_INT \ + && INTVAL (XEXP (index, 1)) \ + == (HOST_WIDE_INT) GET_MODE_SIZE (MODE) \ + && borx_reg_operand (base, Pmode)) \ + goto ADDR; \ + } \ + else if (GET_CODE (X) == LO_SUM \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REG_OK_FOR_BASE_P (XEXP (X, 0)) \ + && CONSTANT_P (XEXP (X, 1)) \ + && (TARGET_SOFT_FLOAT \ + /* We can allow symbolic LO_SUM addresses for PA2.0. */ \ + || (TARGET_PA_20 \ + && !TARGET_ELF32 \ + && GET_CODE (XEXP (X, 1)) != CONST_INT) \ + || ((MODE) != SFmode \ + && (MODE) != DFmode))) \ + goto ADDR; \ + else if (GET_CODE (X) == LO_SUM \ + && GET_CODE (XEXP (X, 0)) == SUBREG \ + && GET_CODE (SUBREG_REG (XEXP (X, 0))) == REG \ + && REG_OK_FOR_BASE_P (SUBREG_REG (XEXP (X, 0))) \ + && CONSTANT_P (XEXP (X, 1)) \ + && (TARGET_SOFT_FLOAT \ + /* We can allow symbolic LO_SUM addresses for PA2.0. */ \ + || (TARGET_PA_20 \ + && !TARGET_ELF32 \ + && GET_CODE (XEXP (X, 1)) != CONST_INT) \ + || ((MODE) != SFmode \ + && (MODE) != DFmode))) \ + goto ADDR; \ + else if (GET_CODE (X) == CONST_INT && INT_5_BITS (X)) \ + goto ADDR; \ + /* Needed for -fPIC */ \ + else if (GET_CODE (X) == LO_SUM \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REG_OK_FOR_BASE_P (XEXP (X, 0)) \ + && GET_CODE (XEXP (X, 1)) == UNSPEC \ + && (TARGET_SOFT_FLOAT \ + || (TARGET_PA_20 && !TARGET_ELF32) \ + || ((MODE) != SFmode \ + && (MODE) != DFmode))) \ + goto ADDR; \ +} + +/* Look for machine dependent ways to make the invalid address AD a + valid address. + + For the PA, transform: + + memory(X + <large int>) + + into: + + if (<large int> & mask) >= 16 + Y = (<large int> & ~mask) + mask + 1 Round up. + else + Y = (<large int> & ~mask) Round down. + Z = X + Y + memory (Z + (<large int> - Y)); + + This makes reload inheritance and reload_cse work better since Z + can be reused. + + There may be more opportunities to improve code with this hook. */ +#define LEGITIMIZE_RELOAD_ADDRESS(AD, MODE, OPNUM, TYPE, IND, WIN) \ +do { \ + long offset, newoffset, mask; \ + rtx new_rtx, temp = NULL_RTX; \ + \ + mask = (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff); \ + \ + if (optimize && GET_CODE (AD) == PLUS) \ + temp = simplify_binary_operation (PLUS, Pmode, \ + XEXP (AD, 0), XEXP (AD, 1)); \ + \ + new_rtx = temp ? temp : AD; \ + \ + if (optimize \ + && GET_CODE (new_rtx) == PLUS \ + && GET_CODE (XEXP (new_rtx, 0)) == REG \ + && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT) \ + { \ + offset = INTVAL (XEXP ((new_rtx), 1)); \ + \ + /* Choose rounding direction. Round up if we are >= halfway. */ \ + if ((offset & mask) >= ((mask + 1) / 2)) \ + newoffset = (offset & ~mask) + mask + 1; \ + else \ + newoffset = offset & ~mask; \ + \ + /* Ensure that long displacements are aligned. */ \ + if (mask == 0x3fff \ + && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || (TARGET_64BIT && (MODE) == DImode))) \ + newoffset &= ~(GET_MODE_SIZE (MODE) - 1); \ + \ + if (newoffset != 0 && VAL_14_BITS_P (newoffset)) \ + { \ + temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0), \ + GEN_INT (newoffset)); \ + AD = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));\ + push_reload (XEXP (AD, 0), 0, &XEXP (AD, 0), 0, \ + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, \ + (OPNUM), (TYPE)); \ + goto WIN; \ + } \ + } \ +} while (0) + + + +#define TARGET_ASM_SELECT_SECTION pa_select_section + +/* Return a nonzero value if DECL has a section attribute. */ +#define IN_NAMED_SECTION_P(DECL) \ + ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ + && DECL_SECTION_NAME (DECL) != NULL_TREE) + +/* Define this macro if references to a symbol must be treated + differently depending on something about the variable or + function named by the symbol (such as what section it is in). + + The macro definition, if any, is executed immediately after the + rtl for DECL or other node is created. + The value of the rtl will be a `mem' whose address is a + `symbol_ref'. + + The usual thing for this macro to do is to a flag in the + `symbol_ref' (such as `SYMBOL_REF_FLAG') or to store a modified + name string in the `symbol_ref' (if one bit is not enough + information). + + On the HP-PA we use this to indicate if a symbol is in text or + data space. Also, function labels need special treatment. */ + +#define TEXT_SPACE_P(DECL)\ + (TREE_CODE (DECL) == FUNCTION_DECL \ + || (TREE_CODE (DECL) == VAR_DECL \ + && TREE_READONLY (DECL) && ! TREE_SIDE_EFFECTS (DECL) \ + && (! DECL_INITIAL (DECL) || ! reloc_needed (DECL_INITIAL (DECL))) \ + && !flag_pic) \ + || CONSTANT_CLASS_P (DECL)) + +#define FUNCTION_NAME_P(NAME) (*(NAME) == '@') + +/* Specify the machine mode that this machine uses for the index in the + tablejump instruction. For small tables, an element consists of a + ia-relative branch and its delay slot. When -mbig-switch is specified, + we use a 32-bit absolute address for non-pic code, and a 32-bit offset + for both 32 and 64-bit pic code. */ +#define CASE_VECTOR_MODE (TARGET_BIG_SWITCH ? SImode : DImode) + +/* Jump tables must be 32-bit aligned, no matter the size of the element. */ +#define ADDR_VEC_ALIGN(ADDR_VEC) 2 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 8 + +/* Higher than the default as we prefer to use simple move insns + (better scheduling and delay slot filling) and because our + built-in block move is really a 2X unrolled loop. + + Believe it or not, this has to be big enough to allow for copying all + arguments passed in registers to avoid infinite recursion during argument + setup for a function call. Why? Consider how we copy the stack slots + reserved for parameters when they may be trashed by a call. */ +#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Nonzero if access to memory by bytes is slow and undesirable. */ +#define SLOW_BYTE_ACCESS 1 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode word_mode + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. For floating-point, CCFPmode + should be used. CC_NOOVmode should be used when the first operand is a + PLUS, MINUS, or NEG. CCmode should be used when no special processing is + needed. */ +#define SELECT_CC_MODE(OP,X,Y) \ + (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT ? CCFPmode : CCmode) \ + +/* A function address in a call instruction + is a byte address (for indexing purposes) + so give the MEM rtx a byte's mode. */ +#define FUNCTION_MODE SImode + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. */ +#define NO_FUNCTION_CSE + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Adjust the cost of branches. */ +#define BRANCH_COST(speed_p, predictable_p) (pa_cpu == PROCESSOR_8000 ? 2 : 1) + +/* Handling the special cases is going to get too complicated for a macro, + just call `pa_adjust_insn_length' to do the real work. */ +#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ + ((LENGTH) = pa_adjust_insn_length ((INSN), (LENGTH))) + +/* Millicode insns are actually function calls with some special + constraints on arguments and register usage. + + Millicode calls always expect their arguments in the integer argument + registers, and always return their result in %r29 (ret1). They + are expected to clobber their arguments, %r1, %r29, and the return + pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. + + This macro tells reorg that the references to arguments and + millicode calls do not appear to happen until after the millicode call. + This allows reorg to put insns which set the argument registers into the + delay slot of the millicode call -- thus they act more like traditional + CALL_INSNs. + + Note we cannot consider side effects of the insn to be delayed because + the branch and link insn will clobber the return pointer. If we happened + to use the return pointer in the delay slot of the call, then we lose. + + get_attr_type will try to recognize the given insn, so make sure to + filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns + in particular. */ +#define INSN_REFERENCES_ARE_DELAYED(X) (insn_refs_are_delayed (X)) + + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at + the end of the line. */ + +#define ASM_COMMENT_START ";" + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ + +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ + +#define ASM_APP_OFF "" + +/* This is how to output the definition of a user-level label named NAME, + such as the label on a static function or variable NAME. */ + +#define ASM_OUTPUT_LABEL(FILE,NAME) \ + do { \ + assemble_name ((FILE), (NAME)); \ + if (TARGET_GAS) \ + fputs (":\n", (FILE)); \ + else \ + fputc ('\n', (FILE)); \ + } while (0) + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ + +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { \ + const char *xname = (NAME); \ + if (FUNCTION_NAME_P (NAME)) \ + xname += 1; \ + if (xname[0] == '*') \ + xname += 1; \ + else \ + fputs (user_label_prefix, FILE); \ + fputs (xname, FILE); \ + } while (0) + +/* This how we output the symbol_ref X. */ + +#define ASM_OUTPUT_SYMBOL_REF(FILE,X) \ + do { \ + SYMBOL_REF_FLAGS (X) |= SYMBOL_FLAG_REFERENCED; \ + assemble_name (FILE, XSTR (X, 0)); \ + } while (0) + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*%c$%s%04ld", (PREFIX)[0], (PREFIX) + 1, (long)(NUM)) + +/* Output the definition of a compiler-generated label named NAME. */ + +#define ASM_OUTPUT_INTERNAL_LABEL(FILE,NAME) \ + do { \ + assemble_name_raw ((FILE), (NAME)); \ + if (TARGET_GAS) \ + fputs (":\n", (FILE)); \ + else \ + fputc ('\n', (FILE)); \ + } while (0) + +#define TARGET_ASM_GLOBALIZE_LABEL pa_globalize_label + +#define ASM_OUTPUT_ASCII(FILE, P, SIZE) \ + output_ascii ((FILE), (P), (SIZE)) + +/* Jump tables are always placed in the text section. Technically, it + is possible to put them in the readonly data section when -mbig-switch + is specified. This has the benefit of getting the table out of .text + and reducing branch lengths as a result. The downside is that an + additional insn (addil) is needed to access the table when generating + PIC code. The address difference table also has to use 32-bit + pc-relative relocations. Currently, GAS does not support these + relocations, although it is easily modified to do this operation. + The table entries need to look like "$L1+(.+8-$L0)-$PIC_pcrel$0" + when using ELF GAS. A simple difference can be used when using + SOM GAS or the HP assembler. The final downside is GDB complains + about the nesting of the label for the table when debugging. */ + +#define JUMP_TABLES_IN_TEXT_SECTION 1 + +/* This is how to output an element of a case-vector that is absolute. */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + if (TARGET_BIG_SWITCH) \ + fprintf (FILE, "\t.word L$%04d\n", VALUE); \ + else \ + fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE) + +/* This is how to output an element of a case-vector that is relative. + Since we always place jump tables in the text section, the difference + is absolute and requires no relocation. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + if (TARGET_BIG_SWITCH) \ + fprintf (FILE, "\t.word L$%04d-L$%04d\n", VALUE, REL); \ + else \ + fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE) + +/* This is how to output an assembler line that says to advance the + location counter to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + fprintf (FILE, "\t.align %d\n", (1<<(LOG))) + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.blockz "HOST_WIDE_INT_PRINT_UNSIGNED"\n", \ + (unsigned HOST_WIDE_INT)(SIZE)) + +/* This says how to output an assembler line to define an uninitialized + global variable with size SIZE (in bytes) and alignment ALIGN (in bits). + This macro exists to properly support languages like C++ which do not + have common data. */ + +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + pa_asm_output_aligned_bss (FILE, NAME, SIZE, ALIGN) + +/* This says how to output an assembler line to define a global common symbol + with size SIZE (in bytes) and alignment ALIGN (in bits). */ + +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN) + +/* This says how to output an assembler line to define a local common symbol + with size SIZE (in bytes) and alignment ALIGN (in bits). This macro + controls how the assembler definitions of uninitialized static variables + are output. */ + +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ + pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN) + +/* All HP assemblers use "!" to separate logical lines. */ +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '!') + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. + + On the HP-PA, the CODE can be `r', meaning this is a register-only operand + and an immediate zero should be represented as `r0'. + + Several % codes are defined: + O an operation + C compare conditions + N extract conditions + M modifier to handle preincrement addressing for memory refs. + F modifier to handle preincrement addressing for fp memory refs */ + +#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) + + +/* Print a memory address as an operand to reference that memory location. */ + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ +{ rtx addr = ADDR; \ + switch (GET_CODE (addr)) \ + { \ + case REG: \ + fprintf (FILE, "0(%s)", reg_names [REGNO (addr)]); \ + break; \ + case PLUS: \ + gcc_assert (GET_CODE (XEXP (addr, 1)) == CONST_INT); \ + fprintf (FILE, "%d(%s)", (int)INTVAL (XEXP (addr, 1)), \ + reg_names [REGNO (XEXP (addr, 0))]); \ + break; \ + case LO_SUM: \ + if (!symbolic_operand (XEXP (addr, 1), VOIDmode)) \ + fputs ("R'", FILE); \ + else if (flag_pic == 0) \ + fputs ("RR'", FILE); \ + else \ + fputs ("RT'", FILE); \ + output_global_address (FILE, XEXP (addr, 1), 0); \ + fputs ("(", FILE); \ + output_operand (XEXP (addr, 0), 0); \ + fputs (")", FILE); \ + break; \ + case CONST_INT: \ + fprintf (FILE, HOST_WIDE_INT_PRINT_DEC "(%%r0)", INTVAL (addr)); \ + break; \ + default: \ + output_addr_const (FILE, addr); \ + }} + + +/* Find the return address associated with the frame given by + FRAMEADDR. */ +#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \ + (return_addr_rtx (COUNT, FRAMEADDR)) + +/* Used to mask out junk bits from the return address, such as + processor state, interrupt status, condition codes and the like. */ +#define MASK_RETURN_ADDR \ + /* The privilege level is in the two low order bits, mask em out \ + of the return address. */ \ + (GEN_INT (-4)) + +/* The number of Pmode words for the setjmp buffer. */ +#define JMP_BUF_SIZE 50 + +/* We need a libcall to canonicalize function pointers on TARGET_ELF32. */ +#define CANONICALIZE_FUNCPTR_FOR_COMPARE_LIBCALL \ + "__canonicalize_funcptr_for_compare" + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif + +/* The maximum offset in bytes for a PA 1.X pc-relative call to the + head of the preceding stub table. The selected offsets have been + chosen so that approximately one call stub is allocated for every + 86.7 instructions. A long branch stub is two instructions when + not generating PIC code. For HP-UX and ELF targets, PIC stubs are + seven and four instructions, respectively. */ +#define MAX_PCREL17F_OFFSET \ + (flag_pic ? (TARGET_HPUX ? 198164 : 221312) : 240000) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md new file mode 100644 index 000000000..7a032c8c0 --- /dev/null +++ b/gcc/config/pa/pa.md @@ -0,0 +1,9543 @@ +;;- Machine description for HP PA-RISC architecture for GCC compiler +;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +;; 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 +;; Free Software Foundation, Inc. +;; Contributed by the Center for Software Science at the University +;; of Utah. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; This gcc Version 2 machine description is inspired by sparc.md and +;; mips.md. + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; Uses of UNSPEC in this file: + +(define_constants + [(UNSPEC_CFFC 0) ; canonicalize_funcptr_for_compare + (UNSPEC_GOTO 1) ; indirect_goto + (UNSPEC_DLTIND14R 2) ; + (UNSPEC_TP 3) + (UNSPEC_TLSGD 4) + (UNSPEC_TLSLDM 5) + (UNSPEC_TLSLDO 6) + (UNSPEC_TLSLDBASE 7) + (UNSPEC_TLSIE 8) + (UNSPEC_TLSLE 9) + (UNSPEC_TLSGD_PIC 10) + (UNSPEC_TLSLDM_PIC 11) + (UNSPEC_TLSIE_PIC 12) + ]) + +;; UNSPEC_VOLATILE: + +(define_constants + [(UNSPECV_BLOCKAGE 0) ; blockage + (UNSPECV_DCACHE 1) ; dcacheflush + (UNSPECV_ICACHE 2) ; icacheflush + (UNSPECV_OPC 3) ; outline_prologue_call + (UNSPECV_OEC 4) ; outline_epilogue_call + (UNSPECV_LONGJMP 5) ; builtin_longjmp + ]) + +;; Maximum pc-relative branch offsets. + +;; These numbers are a bit smaller than the maximum allowable offsets +;; so that a few instructions may be inserted before the actual branch. + +(define_constants + [(MAX_12BIT_OFFSET 8184) ; 12-bit branch + (MAX_17BIT_OFFSET 262100) ; 17-bit branch + ]) + +;; Mode and code iterators + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; This attribute defines the condition prefix for word and double word +;; add, compare, subtract and logical instructions. +(define_mode_attr dwc [(SI "") (DI "*")]) + +;; Insn type. Used to default other attribute values. + +;; type "unary" insns have one input operand (1) and one output operand (0) +;; type "binary" insns have two input operands (1,2) and one output (0) + +(define_attr "type" + "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload" + (const_string "binary")) + +(define_attr "pa_combine_type" + "fmpy,faddsub,uncond_branch,addmove,none" + (const_string "none")) + +;; Processor type (for scheduling, not code generation) -- this attribute +;; must exactly match the processor_type enumeration in pa.h. +;; +;; FIXME: Add 800 scheduling for completeness? + +(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr"))) + +;; Length (in # of bytes). +(define_attr "length" "" + (cond [(eq_attr "type" "load,fpload") + (if_then_else (match_operand 1 "symbolic_memory_operand" "") + (const_int 8) (const_int 4)) + + (eq_attr "type" "store,fpstore") + (if_then_else (match_operand 0 "symbolic_memory_operand" "") + (const_int 8) (const_int 4)) + + (eq_attr "type" "binary,shift,nullshift") + (if_then_else (match_operand 2 "arith_operand" "") + (const_int 4) (const_int 12)) + + (eq_attr "type" "move,unary,shift,nullshift") + (if_then_else (match_operand 1 "arith_operand" "") + (const_int 4) (const_int 8))] + + (const_int 4))) + +(define_asm_attributes + [(set_attr "length" "4") + (set_attr "type" "multi")]) + +;; Attributes for instruction and branch scheduling + +;; For conditional branches. +(define_attr "in_branch_delay" "false,true" + (if_then_else (and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (eq_attr "length" "4") + (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)") + (const_int 0))) + (const_string "true") + (const_string "false"))) + +;; Disallow instructions which use the FPU since they will tie up the FPU +;; even if the instruction is nullified. +(define_attr "in_nullified_branch_delay" "false,true" + (if_then_else (and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch") + (eq_attr "length" "4") + (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)") + (const_int 0))) + (const_string "true") + (const_string "false"))) + +;; For calls and millicode calls. Allow unconditional branches in the +;; delay slot. +(define_attr "in_call_delay" "false,true" + (cond [(and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (eq_attr "length" "4") + (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)") + (const_int 0))) + (const_string "true") + (eq_attr "type" "uncond_branch") + (if_then_else (ne (symbol_ref "TARGET_JUMP_IN_DELAY") + (const_int 0)) + (const_string "true") + (const_string "false"))] + (const_string "false"))) + + +;; Call delay slot description. +(define_delay (eq_attr "type" "call") + [(eq_attr "in_call_delay" "true") (nil) (nil)]) + +;; Sibcall delay slot description. +(define_delay (eq_attr "type" "sibcall") + [(eq_attr "in_call_delay" "true") (nil) (nil)]) + +;; Millicode call delay slot description. +(define_delay (eq_attr "type" "milli") + [(eq_attr "in_call_delay" "true") (nil) (nil)]) + +;; Return and other similar instructions. +(define_delay (eq_attr "type" "btable_branch,branch,parallel_branch") + [(eq_attr "in_branch_delay" "true") (nil) (nil)]) + +;; Floating point conditional branch delay slot description. +(define_delay (eq_attr "type" "fbranch") + [(eq_attr "in_branch_delay" "true") + (eq_attr "in_nullified_branch_delay" "true") + (nil)]) + +;; Integer conditional branch delay slot description. +;; Nullification of conditional branches on the PA is dependent on the +;; direction of the branch. Forward branches nullify true and +;; backward branches nullify false. If the direction is unknown +;; then nullification is not allowed. +(define_delay (eq_attr "type" "cbranch") + [(eq_attr "in_branch_delay" "true") + (and (eq_attr "in_nullified_branch_delay" "true") + (attr_flag "forward")) + (and (eq_attr "in_nullified_branch_delay" "true") + (attr_flag "backward"))]) + +(define_delay (and (eq_attr "type" "uncond_branch") + (eq (symbol_ref "following_call (insn)") + (const_int 0))) + [(eq_attr "in_branch_delay" "true") (nil) (nil)]) + +;; Memory. Disregarding Cache misses, the Mustang memory times are: +;; load: 2, fpload: 3 +;; store, fpstore: 3, no D-cache operations should be scheduled. + +;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT. +;; Timings: +;; Instruction Time Unit Minimum Distance (unit contention) +;; fcpy 3 ALU 2 +;; fabs 3 ALU 2 +;; fadd 3 ALU 2 +;; fsub 3 ALU 2 +;; fcmp 3 ALU 2 +;; fcnv 3 ALU 2 +;; fmpyadd 3 ALU,MPY 2 +;; fmpysub 3 ALU,MPY 2 +;; fmpycfxt 3 ALU,MPY 2 +;; fmpy 3 MPY 2 +;; fmpyi 3 MPY 2 +;; fdiv,sgl 10 MPY 10 +;; fdiv,dbl 12 MPY 12 +;; fsqrt,sgl 14 MPY 14 +;; fsqrt,dbl 18 MPY 18 +;; +;; We don't model fmpyadd/fmpysub properly as those instructions +;; keep both the FP ALU and MPY units busy. Given that these +;; processors are obsolete, I'm not going to spend the time to +;; model those instructions correctly. + +(define_automaton "pa700") +(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700") + +(define_insn_reservation "W0" 4 + (and (eq_attr "type" "fpcc") + (eq_attr "cpu" "700")) + "fpalu_700*2") + +(define_insn_reservation "W1" 3 + (and (eq_attr "type" "fpalu") + (eq_attr "cpu" "700")) + "fpalu_700*2") + +(define_insn_reservation "W2" 3 + (and (eq_attr "type" "fpmulsgl,fpmuldbl") + (eq_attr "cpu" "700")) + "fpmpy_700*2") + +(define_insn_reservation "W3" 10 + (and (eq_attr "type" "fpdivsgl") + (eq_attr "cpu" "700")) + "fpmpy_700*10") + +(define_insn_reservation "W4" 12 + (and (eq_attr "type" "fpdivdbl") + (eq_attr "cpu" "700")) + "fpmpy_700*12") + +(define_insn_reservation "W5" 14 + (and (eq_attr "type" "fpsqrtsgl") + (eq_attr "cpu" "700")) + "fpmpy_700*14") + +(define_insn_reservation "W6" 18 + (and (eq_attr "type" "fpsqrtdbl") + (eq_attr "cpu" "700")) + "fpmpy_700*18") + +(define_insn_reservation "W7" 2 + (and (eq_attr "type" "load") + (eq_attr "cpu" "700")) + "mem_700") + +(define_insn_reservation "W8" 2 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "700")) + "mem_700") + +(define_insn_reservation "W9" 3 + (and (eq_attr "type" "store") + (eq_attr "cpu" "700")) + "mem_700*3") + +(define_insn_reservation "W10" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "700")) + "mem_700*3") + +(define_insn_reservation "W11" 5 + (and (eq_attr "type" "fpstore_load") + (eq_attr "cpu" "700")) + "mem_700*5") + +(define_insn_reservation "W12" 6 + (and (eq_attr "type" "store_fpload") + (eq_attr "cpu" "700")) + "mem_700*6") + +(define_insn_reservation "W13" 1 + (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore,fpstore_load,store_fpload") + (eq_attr "cpu" "700")) + "dummy_700") + +;; We have a bypass for all computations in the FP unit which feed an +;; FP store as long as the sizes are the same. +(define_bypass 2 "W1,W2" "W10,W11" "hppa_fpstore_bypass_p") +(define_bypass 9 "W3" "W10,W11" "hppa_fpstore_bypass_p") +(define_bypass 11 "W4" "W10,W11" "hppa_fpstore_bypass_p") +(define_bypass 13 "W5" "W10,W11" "hppa_fpstore_bypass_p") +(define_bypass 17 "W6" "W10,W11" "hppa_fpstore_bypass_p") + +;; We have an "anti-bypass" for FP loads which feed an FP store. +(define_bypass 4 "W8,W12" "W10,W11" "hppa_fpstore_bypass_p") + +;; Function units for the 7100 and 7150. The 7100/7150 can dual-issue +;; floating point computations with non-floating point computations (fp loads +;; and stores are not fp computations). +;; +;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also +;; take two cycles, during which no Dcache operations should be scheduled. +;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC +;; all have the same memory characteristics if one disregards cache misses. +;; +;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV. +;; There's no value in modeling the ALU and MUL separately though +;; since there can never be a functional unit conflict given the +;; latency and issue rates for those units. +;; +;; Timings: +;; Instruction Time Unit Minimum Distance (unit contention) +;; fcpy 2 ALU 1 +;; fabs 2 ALU 1 +;; fadd 2 ALU 1 +;; fsub 2 ALU 1 +;; fcmp 2 ALU 1 +;; fcnv 2 ALU 1 +;; fmpyadd 2 ALU,MPY 1 +;; fmpysub 2 ALU,MPY 1 +;; fmpycfxt 2 ALU,MPY 1 +;; fmpy 2 MPY 1 +;; fmpyi 2 MPY 1 +;; fdiv,sgl 8 DIV 8 +;; fdiv,dbl 15 DIV 15 +;; fsqrt,sgl 8 DIV 8 +;; fsqrt,dbl 15 DIV 15 + +(define_automaton "pa7100") +(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100") + +(define_insn_reservation "X0" 2 + (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") + (eq_attr "cpu" "7100")) + "f_7100,fpmac_7100") + +(define_insn_reservation "X1" 8 + (and (eq_attr "type" "fpdivsgl,fpsqrtsgl") + (eq_attr "cpu" "7100")) + "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7") + +(define_insn_reservation "X2" 15 + (and (eq_attr "type" "fpdivdbl,fpsqrtdbl") + (eq_attr "cpu" "7100")) + "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14") + +(define_insn_reservation "X3" 2 + (and (eq_attr "type" "load") + (eq_attr "cpu" "7100")) + "i_7100+mem_7100") + +(define_insn_reservation "X4" 2 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "7100")) + "i_7100+mem_7100") + +(define_insn_reservation "X5" 2 + (and (eq_attr "type" "store") + (eq_attr "cpu" "7100")) + "i_7100+mem_7100,mem_7100") + +(define_insn_reservation "X6" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "7100")) + "i_7100+mem_7100,mem_7100") + +(define_insn_reservation "X7" 4 + (and (eq_attr "type" "fpstore_load") + (eq_attr "cpu" "7100")) + "i_7100+mem_7100,mem_7100*3") + +(define_insn_reservation "X8" 4 + (and (eq_attr "type" "store_fpload") + (eq_attr "cpu" "7100")) + "i_7100+mem_7100,mem_7100*3") + +(define_insn_reservation "X9" 1 + (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,fpstore_load,store_fpload") + (eq_attr "cpu" "7100")) + "i_7100") + +;; We have a bypass for all computations in the FP unit which feed an +;; FP store as long as the sizes are the same. +(define_bypass 1 "X0" "X6,X7" "hppa_fpstore_bypass_p") +(define_bypass 7 "X1" "X6,X7" "hppa_fpstore_bypass_p") +(define_bypass 14 "X2" "X6,X7" "hppa_fpstore_bypass_p") + +;; We have an "anti-bypass" for FP loads which feed an FP store. +(define_bypass 3 "X4,X8" "X6,X7" "hppa_fpstore_bypass_p") + +;; The 7100LC has three floating-point units: ALU, MUL, and DIV. +;; There's no value in modeling the ALU and MUL separately though +;; since there can never be a functional unit conflict that +;; can be avoided given the latency, issue rates and mandatory +;; one cycle cpu-wide lock for a double precision fp multiply. +;; +;; Timings: +;; Instruction Time Unit Minimum Distance (unit contention) +;; fcpy 2 ALU 1 +;; fabs 2 ALU 1 +;; fadd 2 ALU 1 +;; fsub 2 ALU 1 +;; fcmp 2 ALU 1 +;; fcnv 2 ALU 1 +;; fmpyadd,sgl 2 ALU,MPY 1 +;; fmpyadd,dbl 3 ALU,MPY 2 +;; fmpysub,sgl 2 ALU,MPY 1 +;; fmpysub,dbl 3 ALU,MPY 2 +;; fmpycfxt,sgl 2 ALU,MPY 1 +;; fmpycfxt,dbl 3 ALU,MPY 2 +;; fmpy,sgl 2 MPY 1 +;; fmpy,dbl 3 MPY 2 +;; fmpyi 3 MPY 2 +;; fdiv,sgl 8 DIV 8 +;; fdiv,dbl 15 DIV 15 +;; fsqrt,sgl 8 DIV 8 +;; fsqrt,dbl 15 DIV 15 +;; +;; The PA7200 is just like the PA7100LC except that there is +;; no store-store penalty. +;; +;; The PA7300 is just like the PA7200 except that there is +;; no store-load penalty. +;; +;; Note there are some aspects of the 7100LC we are not modeling +;; at the moment. I'll be reviewing the 7100LC scheduling info +;; shortly and updating this description. +;; +;; load-load pairs +;; store-store pairs +;; other issue modeling + +(define_automaton "pa7100lc") +(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc") +(define_cpu_unit "fpmac_7100lc" "pa7100lc") +(define_cpu_unit "mem_7100lc" "pa7100lc") + +;; Double precision multiplies lock the entire CPU for one +;; cycle. There is no way to avoid this lock and trying to +;; schedule around the lock is pointless and thus there is no +;; value in trying to model this lock. +;; +;; Not modeling the lock allows us to treat fp multiplies just +;; like any other FP alu instruction. It allows for a smaller +;; DFA and may reduce register pressure. +(define_insn_reservation "Y0" 2 + (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") + (eq_attr "cpu" "7100LC,7200,7300")) + "f_7100lc,fpmac_7100lc") + +;; fp division and sqrt instructions lock the entire CPU for +;; 7 cycles (single precision) or 14 cycles (double precision). +;; There is no way to avoid this lock and trying to schedule +;; around the lock is pointless and thus there is no value in +;; trying to model this lock. Not modeling the lock allows +;; for a smaller DFA and may reduce register pressure. +(define_insn_reservation "Y1" 1 + (and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl") + (eq_attr "cpu" "7100LC,7200,7300")) + "f_7100lc") + +(define_insn_reservation "Y2" 2 + (and (eq_attr "type" "load") + (eq_attr "cpu" "7100LC,7200,7300")) + "i1_7100lc+mem_7100lc") + +(define_insn_reservation "Y3" 2 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "7100LC,7200,7300")) + "i1_7100lc+mem_7100lc") + +(define_insn_reservation "Y4" 2 + (and (eq_attr "type" "store") + (eq_attr "cpu" "7100LC")) + "i1_7100lc+mem_7100lc,mem_7100lc") + +(define_insn_reservation "Y5" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "7100LC")) + "i1_7100lc+mem_7100lc,mem_7100lc") + +(define_insn_reservation "Y6" 4 + (and (eq_attr "type" "fpstore_load") + (eq_attr "cpu" "7100LC")) + "i1_7100lc+mem_7100lc,mem_7100lc*3") + +(define_insn_reservation "Y7" 4 + (and (eq_attr "type" "store_fpload") + (eq_attr "cpu" "7100LC")) + "i1_7100lc+mem_7100lc,mem_7100lc*3") + +(define_insn_reservation "Y8" 1 + (and (eq_attr "type" "shift,nullshift") + (eq_attr "cpu" "7100LC,7200,7300")) + "i1_7100lc") + +(define_insn_reservation "Y9" 1 + (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift") + (eq_attr "cpu" "7100LC,7200,7300")) + "(i0_7100lc|i1_7100lc)") + +;; The 7200 has a store-load penalty +(define_insn_reservation "Y10" 2 + (and (eq_attr "type" "store") + (eq_attr "cpu" "7200")) + "i1_7100lc,mem_7100lc") + +(define_insn_reservation "Y11" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "7200")) + "i1_7100lc,mem_7100lc") + +(define_insn_reservation "Y12" 4 + (and (eq_attr "type" "fpstore_load") + (eq_attr "cpu" "7200")) + "i1_7100lc,mem_7100lc,i1_7100lc+mem_7100lc") + +(define_insn_reservation "Y13" 4 + (and (eq_attr "type" "store_fpload") + (eq_attr "cpu" "7200")) + "i1_7100lc,mem_7100lc,i1_7100lc+mem_7100lc") + +;; The 7300 has no penalty for store-store or store-load +(define_insn_reservation "Y14" 2 + (and (eq_attr "type" "store") + (eq_attr "cpu" "7300")) + "i1_7100lc") + +(define_insn_reservation "Y15" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "7300")) + "i1_7100lc") + +(define_insn_reservation "Y16" 4 + (and (eq_attr "type" "fpstore_load") + (eq_attr "cpu" "7300")) + "i1_7100lc,i1_7100lc+mem_7100lc") + +(define_insn_reservation "Y17" 4 + (and (eq_attr "type" "store_fpload") + (eq_attr "cpu" "7300")) + "i1_7100lc,i1_7100lc+mem_7100lc") + +;; We have an "anti-bypass" for FP loads which feed an FP store. +(define_bypass 3 "Y3,Y7,Y13,Y17" "Y5,Y6,Y11,Y12,Y15,Y16" "hppa_fpstore_bypass_p") + +;; Scheduling for the PA8000 is somewhat different than scheduling for a +;; traditional architecture. +;; +;; The PA8000 has a large (56) entry reorder buffer that is split between +;; memory and non-memory operations. +;; +;; The PA8000 can issue two memory and two non-memory operations per cycle to +;; the function units, with the exception of branches and multi-output +;; instructions. The PA8000 can retire two non-memory operations per cycle +;; and two memory operations per cycle, only one of which may be a store. +;; +;; Given the large reorder buffer, the processor can hide most latencies. +;; According to HP, they've got the best results by scheduling for retirement +;; bandwidth with limited latency scheduling for floating point operations. +;; Latency for integer operations and memory references is ignored. +;; +;; +;; We claim floating point operations have a 2 cycle latency and are +;; fully pipelined, except for div and sqrt which are not pipelined and +;; take from 17 to 31 cycles to complete. +;; +;; It's worth noting that there is no way to saturate all the functional +;; units on the PA8000 as there is not enough issue bandwidth. + +(define_automaton "pa8000") +(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000") +(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000") +(define_cpu_unit "store_8000" "pa8000") +(define_cpu_unit "f0_8000, f1_8000" "pa8000") +(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000") +(define_reservation "inm_8000" "inm0_8000 | inm1_8000") +(define_reservation "im_8000" "im0_8000 | im1_8000") +(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000") +(define_reservation "rm_8000" "rm0_8000 | rm1_8000") +(define_reservation "f_8000" "f0_8000 | f1_8000") +(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000") + +;; We can issue any two memops per cycle, but we can only retire +;; one memory store per cycle. We assume that the reorder buffer +;; will hide any memory latencies per HP's recommendation. +(define_insn_reservation "Z0" 0 + (and + (eq_attr "type" "load,fpload") + (eq_attr "cpu" "8000")) + "im_8000,rm_8000") + +(define_insn_reservation "Z1" 0 + (and + (eq_attr "type" "store,fpstore") + (eq_attr "cpu" "8000")) + "im_8000,rm_8000+store_8000") + +(define_insn_reservation "Z2" 0 + (and (eq_attr "type" "fpstore_load,store_fpload") + (eq_attr "cpu" "8000")) + "im_8000,rm_8000+store_8000,im_8000,rm_8000") + +;; We can issue and retire two non-memory operations per cycle with +;; a few exceptions (branches). This group catches those we want +;; to assume have zero latency. +(define_insn_reservation "Z3" 0 + (and + (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl,fpstore_load,store_fpload") + (eq_attr "cpu" "8000")) + "inm_8000,rnm_8000") + +;; Branches use both slots in the non-memory issue and +;; retirement unit. +(define_insn_reservation "Z4" 0 + (and + (eq_attr "type" "uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (eq_attr "cpu" "8000")) + "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000") + +;; We partial latency schedule the floating point units. +;; They can issue/retire two at a time in the non-memory +;; units. We fix their latency at 2 cycles and they +;; are fully pipelined. +(define_insn_reservation "Z5" 1 + (and + (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") + (eq_attr "cpu" "8000")) + "inm_8000,f_8000,rnm_8000") + +;; The fdivsqrt units are not pipelined and have a very long latency. +;; To keep the DFA from exploding, we do not show all the +;; reservations for the divsqrt unit. +(define_insn_reservation "Z6" 17 + (and + (eq_attr "type" "fpdivsgl,fpsqrtsgl") + (eq_attr "cpu" "8000")) + "inm_8000,fdivsqrt_8000*6,rnm_8000") + +(define_insn_reservation "Z7" 31 + (and + (eq_attr "type" "fpdivdbl,fpsqrtdbl") + (eq_attr "cpu" "8000")) + "inm_8000,fdivsqrt_8000*6,rnm_8000") + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + +;; Compare instructions. +;; This controls RTL generation and register allocation. + +(define_insn "" + [(set (reg:CCFP 0) + (match_operator:CCFP 2 "comparison_operator" + [(match_operand:SF 0 "reg_or_0_operand" "fG") + (match_operand:SF 1 "reg_or_0_operand" "fG")]))] + "! TARGET_SOFT_FLOAT" + "fcmp,sgl,%Y2 %f0,%f1" + [(set_attr "length" "4") + (set_attr "type" "fpcc")]) + +(define_insn "" + [(set (reg:CCFP 0) + (match_operator:CCFP 2 "comparison_operator" + [(match_operand:DF 0 "reg_or_0_operand" "fG") + (match_operand:DF 1 "reg_or_0_operand" "fG")]))] + "! TARGET_SOFT_FLOAT" + "fcmp,dbl,%Y2 %f0,%f1" + [(set_attr "length" "4") + (set_attr "type" "fpcc")]) + +;; Provide a means to emit the movccfp0 and movccfp1 optimization +;; placeholders. This is necessary in rare situations when a +;; placeholder is re-emitted (see PR 8705). + +(define_expand "movccfp" + [(set (reg:CCFP 0) + (match_operand 0 "const_int_operand" ""))] + "! TARGET_SOFT_FLOAT" + " +{ + if ((unsigned HOST_WIDE_INT) INTVAL (operands[0]) > 1) + FAIL; +}") + +;; The following patterns are optimization placeholders. In almost +;; all cases, the user of the condition code will be simplified and the +;; original condition code setting insn should be eliminated. + +(define_insn "*movccfp0" + [(set (reg:CCFP 0) + (const_int 0))] + "! TARGET_SOFT_FLOAT" + "fcmp,dbl,= %%fr0,%%fr0" + [(set_attr "length" "4") + (set_attr "type" "fpcc")]) + +(define_insn "*movccfp1" + [(set (reg:CCFP 0) + (const_int 1))] + "! TARGET_SOFT_FLOAT" + "fcmp,dbl,!= %%fr0,%%fr0" + [(set_attr "length" "4") + (set_attr "type" "fpcc")]) + +;; scc insns. + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_operand:SI 2 "reg_or_0_operand" "") + (match_operand:SI 3 "arith5_operand" "")]))] + "!TARGET_64BIT" + "") + +;; Instruction canonicalization puts immediate operands second, which +;; is the reverse of what we want. + +(define_insn "scc" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 3 "comparison_operator" + [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith11_operand" "rI")]))] + "" + "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi 1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 3 "comparison_operator" + [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith11_operand" "rI")]))] + "TARGET_64BIT" + "cmp%I2clr,*%B3 %2,%1,%0\;ldi 1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "iorscc" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (match_operator:SI 3 "comparison_operator" + [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith11_operand" "rI")]) + (match_operator:SI 6 "comparison_operator" + [(match_operand:SI 4 "register_operand" "r") + (match_operand:SI 5 "arith11_operand" "rI")])))] + "" + "{com%I2clr|cmp%I2clr},%S3 %2,%1,%%r0\;{com%I5clr|cmp%I5clr},%B6 %5,%4,%0\;ldi 1,%0" + [(set_attr "type" "binary") + (set_attr "length" "12")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (match_operator:DI 3 "comparison_operator" + [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith11_operand" "rI")]) + (match_operator:DI 6 "comparison_operator" + [(match_operand:DI 4 "register_operand" "r") + (match_operand:DI 5 "arith11_operand" "rI")])))] + "TARGET_64BIT" + "cmp%I2clr,*%S3 %2,%1,%%r0\;cmp%I5clr,*%B6 %5,%4,%0\;ldi 1,%0" + [(set_attr "type" "binary") + (set_attr "length" "12")]) + +;; Combiner patterns for common operations performed with the output +;; from an scc insn (negscc and incscc). +(define_insn "negscc" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operator:SI 3 "comparison_operator" + [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith11_operand" "rI")])))] + "" + "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi -1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operator:DI 3 "comparison_operator" + [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith11_operand" "rI")])))] + "TARGET_64BIT" + "cmp%I2clr,*%B3 %2,%1,%0\;ldi -1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +;; Patterns for adding/subtracting the result of a boolean expression from +;; a register. First we have special patterns that make use of the carry +;; bit, and output only two instructions. For the cases we can't in +;; general do in two instructions, the incscc pattern at the end outputs +;; two or three instructions. + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (leu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "arith11_operand" "rI")) + (match_operand:SI 1 "register_operand" "r")))] + "" + "sub%I3 %3,%2,%%r0\;{addc|add,c} %%r0,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (leu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "arith11_operand" "rI")) + (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "sub%I3 %3,%2,%%r0\;add,dc %%r0,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +; This need only accept registers for op3, since canonicalization +; replaces geu with gtu when op3 is an integer. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (geu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r")) + (match_operand:SI 1 "register_operand" "r")))] + "" + "sub %2,%3,%%r0\;{addc|add,c} %%r0,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (geu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r")) + (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "sub %2,%3,%%r0\;add,dc %%r0,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +; Match only integers for op3 here. This is used as canonical form of the +; geu pattern when op3 is an integer. Don't match registers since we can't +; make better code than the general incscc pattern. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (gtu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "int11_operand" "I")) + (match_operand:SI 1 "register_operand" "r")))] + "" + "addi %k3,%2,%%r0\;{addc|add,c} %%r0,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (gtu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "int11_operand" "I")) + (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "addi %k3,%2,%%r0\;add,dc %%r0,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "incscc" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operator:SI 4 "comparison_operator" + [(match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "arith11_operand" "rI,rI")]) + (match_operand:SI 1 "register_operand" "0,?r")))] + "" + "@ + {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi 1,%0,%0 + {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi,tr 1,%1,%0\;copy %1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "8,12")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operator:DI 4 "comparison_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (match_operand:DI 3 "arith11_operand" "rI,rI")]) + (match_operand:DI 1 "register_operand" "0,?r")))] + "TARGET_64BIT" + "@ + cmp%I3clr,*%B4 %3,%2,%%r0\;addi 1,%0,%0 + cmp%I3clr,*%B4 %3,%2,%%r0\;addi,tr 1,%1,%0\;copy %1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "8,12")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (gtu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "arith11_operand" "rI"))))] + "" + "sub%I3 %3,%2,%%r0\;{subb|sub,b} %1,%%r0,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "register_operand" "r") + (gtu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "arith11_operand" "rI"))))] + "TARGET_64BIT" + "sub%I3 %3,%2,%%r0\;sub,db %1,%%r0,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r") + (gtu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "arith11_operand" "rI"))) + (match_operand:SI 4 "register_operand" "r")))] + "" + "sub%I3 %3,%2,%%r0\;{subb|sub,b} %1,%4,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r") + (gtu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "arith11_operand" "rI"))) + (match_operand:DI 4 "register_operand" "r")))] + "TARGET_64BIT" + "sub%I3 %3,%2,%%r0\;sub,db %1,%4,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +; This need only accept registers for op3, since canonicalization +; replaces ltu with leu when op3 is an integer. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (ltu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r"))))] + "" + "sub %2,%3,%%r0\;{subb|sub,b} %1,%%r0,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "register_operand" "r") + (ltu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r"))))] + "TARGET_64BIT" + "sub %2,%3,%%r0\;sub,db %1,%%r0,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r") + (ltu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r"))) + (match_operand:SI 4 "register_operand" "r")))] + "" + "sub %2,%3,%%r0\;{subb|sub,b} %1,%4,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r") + (ltu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r"))) + (match_operand:DI 4 "register_operand" "r")))] + "TARGET_64BIT" + "sub %2,%3,%%r0\;sub,db %1,%4,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +; Match only integers for op3 here. This is used as canonical form of the +; ltu pattern when op3 is an integer. Don't match registers since we can't +; make better code than the general incscc pattern. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (leu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "int11_operand" "I"))))] + "" + "addi %k3,%2,%%r0\;{subb|sub,b} %1,%%r0,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "register_operand" "r") + (leu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "int11_operand" "I"))))] + "TARGET_64BIT" + "addi %k3,%2,%%r0\;sub,db %1,%%r0,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r") + (leu:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "int11_operand" "I"))) + (match_operand:SI 4 "register_operand" "r")))] + "" + "addi %k3,%2,%%r0\;{subb|sub,b} %1,%4,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r") + (leu:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "int11_operand" "I"))) + (match_operand:DI 4 "register_operand" "r")))] + "TARGET_64BIT" + "addi %k3,%2,%%r0\;sub,db %1,%4,%0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "decscc" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "register_operand" "0,?r") + (match_operator:SI 4 "comparison_operator" + [(match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "arith11_operand" "rI,rI")])))] + "" + "@ + {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi -1,%0,%0 + {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi,tr -1,%1,%0\;copy %1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "8,12")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (minus:DI (match_operand:DI 1 "register_operand" "0,?r") + (match_operator:DI 4 "comparison_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (match_operand:DI 3 "arith11_operand" "rI,rI")])))] + "TARGET_64BIT" + "@ + cmp%I3clr,*%B4 %3,%2,%%r0\;addi -1,%0,%0 + cmp%I3clr,*%B4 %3,%2,%%r0\;addi,tr -1,%1,%0\;copy %1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "8,12")]) + +; Patterns for max and min. (There is no need for an earlyclobber in the +; last alternative since the middle alternative will match if op0 == op1.) + +(define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (smin:SI (match_operand:SI 1 "register_operand" "%0,0,r") + (match_operand:SI 2 "arith11_operand" "r,I,M")))] + "" + "@ + {comclr|cmpclr},> %2,%0,%%r0\;copy %2,%0 + {comiclr|cmpiclr},> %2,%0,%%r0\;ldi %2,%0 + {comclr|cmpclr},> %1,%r2,%0\;copy %1,%0" +[(set_attr "type" "multi,multi,multi") + (set_attr "length" "8,8,8")]) + +(define_insn "smindi3" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (smin:DI (match_operand:DI 1 "register_operand" "%0,0,r") + (match_operand:DI 2 "arith11_operand" "r,I,M")))] + "TARGET_64BIT" + "@ + cmpclr,*> %2,%0,%%r0\;copy %2,%0 + cmpiclr,*> %2,%0,%%r0\;ldi %2,%0 + cmpclr,*> %1,%r2,%0\;copy %1,%0" +[(set_attr "type" "multi,multi,multi") + (set_attr "length" "8,8,8")]) + +(define_insn "uminsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (umin:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "arith11_operand" "r,I")))] + "" + "@ + {comclr|cmpclr},>> %2,%0,%%r0\;copy %2,%0 + {comiclr|cmpiclr},>> %2,%0,%%r0\;ldi %2,%0" +[(set_attr "type" "multi,multi") + (set_attr "length" "8,8")]) + +(define_insn "umindi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (umin:DI (match_operand:DI 1 "register_operand" "%0,0") + (match_operand:DI 2 "arith11_operand" "r,I")))] + "TARGET_64BIT" + "@ + cmpclr,*>> %2,%0,%%r0\;copy %2,%0 + cmpiclr,*>> %2,%0,%%r0\;ldi %2,%0" +[(set_attr "type" "multi,multi") + (set_attr "length" "8,8")]) + +(define_insn "smaxsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (smax:SI (match_operand:SI 1 "register_operand" "%0,0,r") + (match_operand:SI 2 "arith11_operand" "r,I,M")))] + "" + "@ + {comclr|cmpclr},< %2,%0,%%r0\;copy %2,%0 + {comiclr|cmpiclr},< %2,%0,%%r0\;ldi %2,%0 + {comclr|cmpclr},< %1,%r2,%0\;copy %1,%0" +[(set_attr "type" "multi,multi,multi") + (set_attr "length" "8,8,8")]) + +(define_insn "smaxdi3" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (smax:DI (match_operand:DI 1 "register_operand" "%0,0,r") + (match_operand:DI 2 "arith11_operand" "r,I,M")))] + "TARGET_64BIT" + "@ + cmpclr,*< %2,%0,%%r0\;copy %2,%0 + cmpiclr,*< %2,%0,%%r0\;ldi %2,%0 + cmpclr,*< %1,%r2,%0\;copy %1,%0" +[(set_attr "type" "multi,multi,multi") + (set_attr "length" "8,8,8")]) + +(define_insn "umaxsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (umax:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "arith11_operand" "r,I")))] + "" + "@ + {comclr|cmpclr},<< %2,%0,%%r0\;copy %2,%0 + {comiclr|cmpiclr},<< %2,%0,%%r0\;ldi %2,%0" +[(set_attr "type" "multi,multi") + (set_attr "length" "8,8")]) + +(define_insn "umaxdi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (umax:DI (match_operand:DI 1 "register_operand" "%0,0") + (match_operand:DI 2 "arith11_operand" "r,I")))] + "TARGET_64BIT" + "@ + cmpclr,*<< %2,%0,%%r0\;copy %2,%0 + cmpiclr,*<< %2,%0,%%r0\;ldi %2,%0" +[(set_attr "type" "multi,multi") + (set_attr "length" "8,8")]) + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (abs:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "or,>= %%r0,%1,%0\;subi 0,%0,%0" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "absdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (abs:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "or,*>= %%r0,%1,%0\;subi 0,%0,%0" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +;;; Experimental conditional move patterns + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI + (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "reg_or_cint_move_operand" "") + (match_operand:SI 3 "reg_or_cint_move_operand" "")))] + "" + " +{ + if (GET_MODE (XEXP (operands[1], 0)) != SImode + || GET_MODE (XEXP (operands[1], 0)) != GET_MODE (XEXP (operands[1], 1))) + FAIL; +}") + +;; We used to accept any register for op1. +;; +;; However, it loses sometimes because the compiler will end up using +;; different registers for op0 and op1 in some critical cases. local-alloc +;; will not tie op0 and op1 because op0 is used in multiple basic blocks. +;; +;; If/when global register allocation supports tying we should allow any +;; register for op1 again. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (if_then_else:SI + (match_operator 2 "comparison_operator" + [(match_operand:SI 3 "register_operand" "r,r,r,r") + (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI")]) + (match_operand:SI 1 "reg_or_cint_move_operand" "0,J,N,K") + (const_int 0)))] + "" + "@ + {com%I4clr|cmp%I4clr},%S2 %4,%3,%%r0\;ldi 0,%0 + {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;ldi %1,%0 + {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;ldil L'%1,%0 + {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;{zdepi|depwi,z} %Z1,%0" + [(set_attr "type" "multi,multi,multi,nullshift") + (set_attr "length" "8,8,8,8")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r") + (if_then_else:SI + (match_operator 5 "comparison_operator" + [(match_operand:SI 3 "register_operand" "r,r,r,r,r,r,r,r") + (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")]) + (match_operand:SI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K") + (match_operand:SI 2 "reg_or_cint_move_operand" "r,J,N,K,0,0,0,0")))] + "" + "@ + {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;copy %2,%0 + {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;ldi %2,%0 + {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;ldil L'%2,%0 + {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;{zdepi|depwi,z} %Z2,%0 + {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;copy %1,%0 + {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;ldi %1,%0 + {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;ldil L'%1,%0 + {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;{zdepi|depwi,z} %Z1,%0" + [(set_attr "type" "multi,multi,multi,nullshift,multi,multi,multi,nullshift") + (set_attr "length" "8,8,8,8,8,8,8,8")]) + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI + (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "reg_or_cint_move_operand" "") + (match_operand:DI 3 "reg_or_cint_move_operand" "")))] + "TARGET_64BIT" + " +{ + if (GET_MODE (XEXP (operands[1], 0)) != DImode + || GET_MODE (XEXP (operands[1], 0)) != GET_MODE (XEXP (operands[1], 1))) + FAIL; +}") + +; We need the first constraint alternative in order to avoid +; earlyclobbers on all other alternatives. +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (if_then_else:DI + (match_operator 2 "comparison_operator" + [(match_operand:DI 3 "register_operand" "r,r,r,r,r") + (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI")]) + (match_operand:DI 1 "reg_or_cint_move_operand" "0,r,J,N,K") + (const_int 0)))] + "TARGET_64BIT" + "@ + cmp%I4clr,*%S2 %4,%3,%%r0\;ldi 0,%0 + cmp%I4clr,*%B2 %4,%3,%0\;copy %1,%0 + cmp%I4clr,*%B2 %4,%3,%0\;ldi %1,%0 + cmp%I4clr,*%B2 %4,%3,%0\;ldil L'%1,%0 + cmp%I4clr,*%B2 %4,%3,%0\;depdi,z %z1,%0" + [(set_attr "type" "multi,multi,multi,multi,nullshift") + (set_attr "length" "8,8,8,8,8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r,r") + (if_then_else:DI + (match_operator 5 "comparison_operator" + [(match_operand:DI 3 "register_operand" "r,r,r,r,r,r,r,r") + (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")]) + (match_operand:DI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K") + (match_operand:DI 2 "reg_or_cint_move_operand" "r,J,N,K,0,0,0,0")))] + "TARGET_64BIT" + "@ + cmp%I4clr,*%S5 %4,%3,%%r0\;copy %2,%0 + cmp%I4clr,*%S5 %4,%3,%%r0\;ldi %2,%0 + cmp%I4clr,*%S5 %4,%3,%%r0\;ldil L'%2,%0 + cmp%I4clr,*%S5 %4,%3,%%r0\;depdi,z %z2,%0 + cmp%I4clr,*%B5 %4,%3,%%r0\;copy %1,%0 + cmp%I4clr,*%B5 %4,%3,%%r0\;ldi %1,%0 + cmp%I4clr,*%B5 %4,%3,%%r0\;ldil L'%1,%0 + cmp%I4clr,*%B5 %4,%3,%%r0\;depdi,z %z1,%0" + [(set_attr "type" "multi,multi,multi,nullshift,multi,multi,multi,nullshift") + (set_attr "length" "8,8,8,8,8,8,8,8")]) + +;; Conditional Branches + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "register_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_64BIT" + "") + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:SI 1 "reg_or_0_operand" "") + (match_operand:SI 2 "arith5_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +(define_expand "cbranchsf4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SF 1 "reg_or_0_operand" "") + (match_operand:SF 2 "reg_or_0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + " +{ + emit_bcond_fp (operands); + DONE; +}") + + +(define_expand "cbranchdf4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DF 1 "reg_or_0_operand" "") + (match_operand:DF 2 "reg_or_0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + " +{ + emit_bcond_fp (operands); + DONE; +}") + +;; Match the branch patterns. + + +;; Note a long backward conditional branch with an annulled delay slot +;; has a length of 12. +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 3 "comparison_operator" + [(match_operand:SI 1 "reg_or_0_operand" "rM") + (match_operand:SI 2 "arith5_operand" "rL")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* +{ + return output_cbranch (operands, 0, insn); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +;; Match the negated branch. + +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 3 "comparison_operator" + [(match_operand:SI 1 "reg_or_0_operand" "rM") + (match_operand:SI 2 "arith5_operand" "rL")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "* +{ + return output_cbranch (operands, 1, insn); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 3 "comparison_operator" + [(match_operand:DI 1 "reg_or_0_operand" "rM") + (match_operand:DI 2 "reg_or_0_operand" "rM")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_64BIT" + "* +{ + return output_cbranch (operands, 0, insn); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +;; Match the negated branch. + +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 3 "comparison_operator" + [(match_operand:DI 1 "reg_or_0_operand" "rM") + (match_operand:DI 2 "reg_or_0_operand" "rM")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "TARGET_64BIT" + "* +{ + return output_cbranch (operands, 1, insn); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 3 "cmpib_comparison_operator" + [(match_operand:DI 1 "reg_or_0_operand" "rM") + (match_operand:DI 2 "arith5_operand" "rL")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_64BIT" + "* +{ + return output_cbranch (operands, 0, insn); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +;; Match the negated branch. + +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 3 "cmpib_comparison_operator" + [(match_operand:DI 1 "reg_or_0_operand" "rM") + (match_operand:DI 2 "arith5_operand" "rL")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "TARGET_64BIT" + "* +{ + return output_cbranch (operands, 1, insn); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +;; Branch on Bit patterns. +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "uint5_operand" "")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "* +{ + return output_bb (operands, 0, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "uint32_operand" "")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_64BIT" + "* +{ + return output_bb (operands, 0, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "uint5_operand" "")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" + "* +{ + return output_bb (operands, 1, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "uint32_operand" "")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "TARGET_64BIT" + "* +{ + return output_bb (operands, 1, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "uint5_operand" "")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "* +{ + return output_bb (operands, 0, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "uint32_operand" "")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_64BIT" + "* +{ + return output_bb (operands, 0, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "uint5_operand" "")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" + "* +{ + return output_bb (operands, 1, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "uint32_operand" "")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "TARGET_64BIT" + "* +{ + return output_bb (operands, 1, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +;; Branch on Variable Bit patterns. +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "register_operand" "q")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "* +{ + return output_bvb (operands, 0, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "register_operand" "q")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_64BIT" + "* +{ + return output_bvb (operands, 0, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "register_operand" "q")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" + "* +{ + return output_bvb (operands, 1, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "register_operand" "q")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "TARGET_64BIT" + "* +{ + return output_bvb (operands, 1, insn, 0); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "register_operand" "q")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "* +{ + return output_bvb (operands, 0, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "register_operand" "q")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_64BIT" + "* +{ + return output_bvb (operands, 0, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "register_operand" "q")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" + "* +{ + return output_bvb (operands, 1, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "register_operand" "q")) + (const_int 0)) + (pc) + (label_ref (match_operand 2 "" ""))))] + "TARGET_64BIT" + "* +{ + return output_bvb (operands, 1, insn, 1); +}" +[(set_attr "type" "cbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +;; Floating point branches + +;; ??? Nullification is handled differently from other branches. +;; If nullification is specified, the delay slot is nullified on any +;; taken branch regardless of branch direction. +(define_insn "" + [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "!TARGET_SOFT_FLOAT" + "* +{ + int length = get_attr_length (insn); + rtx xoperands[1]; + int nullify, xdelay; + + if (length < 16) + return \"ftest\;b%* %l0\"; + + if (dbr_sequence_length () == 0 || INSN_ANNULLED_BRANCH_P (insn)) + { + nullify = 1; + xdelay = 0; + xoperands[0] = GEN_INT (length - 8); + } + else + { + nullify = 0; + xdelay = 1; + xoperands[0] = GEN_INT (length - 4); + } + + if (nullify) + output_asm_insn (\"ftest\;add,tr %%r0,%%r0,%%r0\;b,n .+%0\", xoperands); + else + output_asm_insn (\"ftest\;add,tr %%r0,%%r0,%%r0\;b .+%0\", xoperands); + return output_lbranch (operands[0], insn, xdelay); +}" +[(set_attr "type" "fbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36)))]) + +(define_insn "" + [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "!TARGET_SOFT_FLOAT" + "* +{ + int length = get_attr_length (insn); + rtx xoperands[1]; + int nullify, xdelay; + + if (length < 16) + return \"ftest\;add,tr %%r0,%%r0,%%r0\;b%* %0\"; + + if (dbr_sequence_length () == 0 || INSN_ANNULLED_BRANCH_P (insn)) + { + nullify = 1; + xdelay = 0; + xoperands[0] = GEN_INT (length - 4); + } + else + { + nullify = 0; + xdelay = 1; + xoperands[0] = GEN_INT (length); + } + + if (nullify) + output_asm_insn (\"ftest\;b,n .+%0\", xoperands); + else + output_asm_insn (\"ftest\;b .+%0\", xoperands); + return output_lbranch (operands[0], insn, xdelay); +}" +[(set_attr "type" "fbranch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 12) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 28) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 24)] + (const_int 32)))]) + +;; Move instructions + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + " +{ + if (emit_move_sequence (operands, SImode, 0)) + DONE; +}") + +;; Handle SImode input reloads requiring %r1 as a scratch register. +(define_expand "reload_insi_r1" + [(set (match_operand:SI 0 "register_operand" "=Z") + (match_operand:SI 1 "non_hard_reg_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "=&a"))] + "" + " +{ + if (emit_move_sequence (operands, SImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle SImode input reloads requiring a general register as a +;; scratch register. +(define_expand "reload_insi" + [(set (match_operand:SI 0 "register_operand" "=Z") + (match_operand:SI 1 "non_hard_reg_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, SImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle SImode output reloads requiring a general register as a +;; scratch register. +(define_expand "reload_outsi" + [(set (match_operand:SI 0 "non_hard_reg_operand" "") + (match_operand:SI 1 "register_operand" "Z")) + (clobber (match_operand:SI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, SImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +(define_insn "" + [(set (match_operand:SI 0 "move_dest_operand" + "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T,?r,?*f") + (match_operand:SI 1 "move_src_operand" + "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f,*f,r"))] + "(register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode)) + && !TARGET_SOFT_FLOAT + && !TARGET_64BIT" + "@ + ldw RT'%A1,%0 + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + {zdepi|depwi,z} %Z1,%0 + ldw%M1 %1,%0 + stw%M0 %r1,%0 + mtsar %r1 + {mfctl|mfctl,w} %%sar,%0 + fcpy,sgl %f1,%0 + fldw%F1 %1,%0 + fstw%F0 %1,%0 + {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0 + {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0" + [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore,fpstore_load,store_fpload") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8,8")]) + +(define_insn "" + [(set (match_operand:SI 0 "move_dest_operand" + "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T") + (match_operand:SI 1 "move_src_operand" + "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))] + "(register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode)) + && !TARGET_SOFT_FLOAT + && TARGET_64BIT" + "@ + ldw RT'%A1,%0 + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + {zdepi|depwi,z} %Z1,%0 + ldw%M1 %1,%0 + stw%M0 %r1,%0 + mtsar %r1 + {mfctl|mfctl,w} %%sar,%0 + fcpy,sgl %f1,%0 + fldw%F1 %1,%0 + fstw%F0 %1,%0" + [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")]) + +(define_insn "" + [(set (match_operand:SI 0 "indexed_memory_operand" "=R") + (match_operand:SI 1 "register_operand" "f"))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && reload_completed" + "fstw%F0 %1,%0" + [(set_attr "type" "fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4")]) + +; Rewrite RTL using an indexed store. This will allow the insn that +; computes the address to be deleted if the register it sets is dead. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "") + (const_int 4)) + (match_operand:SI 2 "register_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 2 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (const_int 4)))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 4)) + (match_operand:DI 2 "register_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 2 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 4)))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:SI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:DI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SI (plus:DI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))] + "") + +(define_insn "" + [(set (match_operand:SI 0 "move_dest_operand" + "=r,r,r,r,r,r,Q,!*q,!r") + (match_operand:SI 1 "move_src_operand" + "A,r,J,N,K,RQ,rM,!rM,!*q"))] + "(register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode)) + && TARGET_SOFT_FLOAT" + "@ + ldw RT'%A1,%0 + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + {zdepi|depwi,z} %Z1,%0 + ldw%M1 %1,%0 + stw%M0 %r1,%0 + mtsar %r1 + {mfctl|mfctl,w} %%sar,%0" + [(set_attr "type" "load,move,move,move,move,load,store,move,move") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4,4")]) + +;; Load or store with base-register modification. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:DI (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L")))) + (set (match_dup 1) + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldw,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +; And a zero extended variant. +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (mem:SI + (plus:DI + (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L"))))) + (set (match_dup 1) + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldw,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_expand "pre_load" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mem (plus (match_operand 1 "register_operand" "") + (match_operand 2 "pre_cint_operand" "")))) + (set (match_dup 1) + (plus (match_dup 1) (match_dup 2)))])] + "" + " +{ + if (TARGET_64BIT) + { + emit_insn (gen_pre_ldd (operands[0], operands[1], operands[2])); + DONE; + } + emit_insn (gen_pre_ldw (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "pre_ldw" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "+r") + (match_operand:SI 2 "pre_cint_operand" "")))) + (set (match_dup 1) + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "* +{ + if (INTVAL (operands[2]) < 0) + return \"{ldwm|ldw,mb} %2(%1),%0\"; + return \"{ldws|ldw},mb %2(%1),%0\"; +}" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "pre_ldd" + [(set (match_operand:DI 0 "register_operand" "=r") + (mem:DI (plus:DI (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "pre_cint_operand" "")))) + (set (match_dup 1) + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldd,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "pre_cint_operand" ""))) + (match_operand:SI 2 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1)))] + "" + "* +{ + if (INTVAL (operands[1]) < 0) + return \"{stwm|stw,mb} %r2,%1(%0)\"; + return \"{stws|stw},mb %r2,%1(%0)\"; +}" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (match_operand:SI 1 "register_operand" "+r"))) + (set (match_dup 1) + (plus:SI (match_dup 1) + (match_operand:SI 2 "post_cint_operand" "")))] + "" + "* +{ + if (INTVAL (operands[2]) > 0) + return \"{ldwm|ldw,ma} %2(%1),%0\"; + return \"{ldws|ldw},ma %2(%1),%0\"; +}" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_expand "post_store" + [(parallel [(set (mem (match_operand 0 "register_operand" "")) + (match_operand 1 "reg_or_0_operand" "")) + (set (match_dup 0) + (plus (match_dup 0) + (match_operand 2 "post_cint_operand" "")))])] + "" + " +{ + if (TARGET_64BIT) + { + emit_insn (gen_post_std (operands[0], operands[1], operands[2])); + DONE; + } + emit_insn (gen_post_stw (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "post_stw" + [(set (mem:SI (match_operand:SI 0 "register_operand" "+r")) + (match_operand:SI 1 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:SI (match_dup 0) + (match_operand:SI 2 "post_cint_operand" "")))] + "" + "* +{ + if (INTVAL (operands[2]) > 0) + return \"{stwm|stw,ma} %r1,%2(%0)\"; + return \"{stws|stw},ma %r1,%2(%0)\"; +}" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "post_std" + [(set (mem:DI (match_operand:DI 0 "register_operand" "+r")) + (match_operand:DI 1 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:DI (match_dup 0) + (match_operand:DI 2 "post_cint_operand" "")))] + "TARGET_64BIT" + "std,ma %r1,%2(%0)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; For loading the address of a label while generating PIC code. +;; Note since this pattern can be created at reload time (via movsi), all +;; the same rules for movsi apply here. (no new pseudos, no temporaries). +(define_insn "" + [(set (match_operand 0 "pmode_register_operand" "=a") + (match_operand 1 "pic_label_operand" ""))] + "TARGET_PA_20" + "* +{ + rtx xoperands[3]; + + xoperands[0] = operands[0]; + xoperands[1] = operands[1]; + xoperands[2] = gen_label_rtx (); + + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (xoperands[2])); + output_asm_insn (\"mfia %0\", xoperands); + + /* If we're trying to load the address of a label that happens to be + close, then we can use a shorter sequence. */ + if (GET_CODE (operands[1]) == LABEL_REF + && !LABEL_REF_NONLOCAL_P (operands[1]) + && INSN_ADDRESSES_SET_P () + && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0))) + - INSN_ADDRESSES (INSN_UID (insn))) < 8100) + output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands); + else + { + output_asm_insn (\"addil L%%%1-%2,%0\", xoperands); + output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands); + } + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "12")]) ; 8 or 12 + +(define_insn "" + [(set (match_operand 0 "pmode_register_operand" "=a") + (match_operand 1 "pic_label_operand" ""))] + "!TARGET_PA_20" + "* +{ + rtx xoperands[3]; + + xoperands[0] = operands[0]; + xoperands[1] = operands[1]; + xoperands[2] = gen_label_rtx (); + + output_asm_insn (\"bl .+8,%0\", xoperands); + output_asm_insn (\"depi 0,31,2,%0\", xoperands); + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (xoperands[2])); + + /* If we're trying to load the address of a label that happens to be + close, then we can use a shorter sequence. */ + if (GET_CODE (operands[1]) == LABEL_REF + && !LABEL_REF_NONLOCAL_P (operands[1]) + && INSN_ADDRESSES_SET_P () + && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0))) + - INSN_ADDRESSES (INSN_UID (insn))) < 8100) + output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands); + else + { + output_asm_insn (\"addil L%%%1-%2,%0\", xoperands); + output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands); + } + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "16")]) ; 12 or 16 + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (high:SI (match_operand 2 "" ""))))] + "symbolic_operand (operands[2], Pmode) + && ! function_label_operand (operands[2], Pmode) + && flag_pic" + "addil LT'%G2,%1" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=a") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand 2 "" ""))))] + "symbolic_operand (operands[2], Pmode) + && ! function_label_operand (operands[2], Pmode) + && TARGET_64BIT + && flag_pic" + "addil LT'%G2,%1" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +;; Always use addil rather than ldil;add sequences. This allows the +;; HP linker to eliminate the dp relocation if the symbolic operand +;; lives in the TEXT space. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (high:SI (match_operand 1 "" "")))] + "symbolic_operand (operands[1], Pmode) + && ! function_label_operand (operands[1], Pmode) + && ! read_only_operand (operands[1], Pmode) + && ! flag_pic" + "* +{ + if (TARGET_LONG_LOAD_STORE) + return \"addil NLR'%H1,%%r27\;ldo N'%H1(%%r1),%%r1\"; + else + return \"addil LR'%H1,%%r27\"; +}" + [(set_attr "type" "binary") + (set (attr "length") + (if_then_else (eq (symbol_ref "TARGET_LONG_LOAD_STORE") (const_int 0)) + (const_int 4) + (const_int 8)))]) + + +;; This is for use in the prologue/epilogue code. We need it +;; to add large constants to a stack pointer or frame pointer. +;; Because of the additional %r1 pressure, we probably do not +;; want to use this in general code, so make it available +;; only after reload. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=!a,*r") + (plus:SI (match_operand:SI 1 "register_operand" "r,r") + (high:SI (match_operand 2 "const_int_operand" ""))))] + "reload_completed" + "@ + addil L'%G2,%1 + ldil L'%G2,%0\;{addl|add,l} %0,%1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=!a,*r") + (plus:DI (match_operand:DI 1 "register_operand" "r,r") + (high:DI (match_operand 2 "const_int_operand" ""))))] + "reload_completed && TARGET_64BIT" + "@ + addil L'%G2,%1 + ldil L'%G2,%0\;{addl|add,l} %0,%1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,8")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (match_operand 1 "" "")))] + "(!flag_pic || !symbolic_operand (operands[1], Pmode)) + && !is_function_label_plus_const (operands[1])" + "* +{ + if (symbolic_operand (operands[1], Pmode)) + return \"ldil LR'%H1,%0\"; + else + return \"ldil L'%G1,%0\"; +}" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (match_operand 1 "const_int_operand" "")))] + "TARGET_64BIT" + "ldil L'%G1,%0"; + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "i")))] + "TARGET_64BIT" + "ldo R'%G2(%1),%0"; + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "i")))] + "!is_function_label_plus_const (operands[2])" + "* +{ + gcc_assert (!flag_pic || !symbolic_operand (operands[2], Pmode)); + + if (symbolic_operand (operands[2], Pmode)) + return \"ldo RR'%G2(%1),%0\"; + else + return \"ldo R'%G2(%1),%0\"; +}" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +;; Now that a symbolic_address plus a constant is broken up early +;; in the compilation phase (for better CSE) we need a special +;; combiner pattern to load the symbolic address plus the constant +;; in only 2 instructions. (For cases where the symbolic address +;; was not a common subexpression.) +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "symbolic_operand" "")) + (clobber (match_operand:SI 2 "register_operand" ""))] + "! (flag_pic && pic_label_operand (operands[1], SImode))" + [(set (match_dup 2) (high:SI (match_dup 1))) + (set (match_dup 0) (lo_sum:SI (match_dup 2) (match_dup 1)))] + "") + +;; hppa_legitimize_address goes to a great deal of trouble to +;; create addresses which use indexing. In some cases, this +;; is a lose because there isn't any store instructions which +;; allow indexed addresses (with integer register source). +;; +;; These define_splits try to turn a 3 insn store into +;; a 2 insn store with some creative RTL rewriting. +(define_split + [(set (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "shadd_operand" "")) + (plus:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")))) + (match_operand:SI 4 "register_operand" "")) + (clobber (match_operand:SI 5 "register_operand" ""))] + "" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (mem:SI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))] + "") + +(define_split + [(set (mem:HI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "shadd_operand" "")) + (plus:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")))) + (match_operand:HI 4 "register_operand" "")) + (clobber (match_operand:SI 5 "register_operand" ""))] + "" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (mem:HI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))] + "") + +(define_split + [(set (mem:QI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "shadd_operand" "")) + (plus:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")))) + (match_operand:QI 4 "register_operand" "")) + (clobber (match_operand:SI 5 "register_operand" ""))] + "" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (mem:QI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))] + "") + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + if (emit_move_sequence (operands, HImode, 0)) + DONE; +}") + +;; Handle HImode input reloads requiring a general register as a +;; scratch register. +(define_expand "reload_inhi" + [(set (match_operand:HI 0 "register_operand" "=Z") + (match_operand:HI 1 "non_hard_reg_operand" "")) + (clobber (match_operand:HI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, HImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle HImode output reloads requiring a general register as a +;; scratch register. +(define_expand "reload_outhi" + [(set (match_operand:HI 0 "non_hard_reg_operand" "") + (match_operand:HI 1 "register_operand" "Z")) + (clobber (match_operand:HI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, HImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +(define_insn "" + [(set (match_operand:HI 0 "move_dest_operand" + "=r,r,r,r,r,Q,!*q,!r") + (match_operand:HI 1 "move_src_operand" + "r,J,N,K,RQ,rM,!rM,!*q"))] + "(register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode))" + "@ + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + {zdepi|depwi,z} %Z1,%0 + ldh%M1 %1,%0 + sth%M0 %r1,%0 + mtsar %r1 + {mfctl|mfctl,w} %sar,%0" + [(set_attr "type" "move,move,move,shift,load,store,move,move") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4")]) + +(define_insn "" + [(set (match_operand:HI 0 "register_operand" "=r") + (mem:HI (plus:SI (match_operand:SI 1 "register_operand" "+r") + (match_operand:SI 2 "int5_operand" "L")))) + (set (match_dup 1) + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "{ldhs|ldh},mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:HI 0 "register_operand" "=r") + (mem:HI (plus:DI (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L")))) + (set (match_dup 1) + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldh,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +; And a zero extended variant. +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (mem:HI + (plus:DI + (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L"))))) + (set (match_dup 1) + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldh,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:HI + (plus:SI + (match_operand:SI 1 "register_operand" "+r") + (match_operand:SI 2 "int5_operand" "L"))))) + (set (match_dup 1) + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "{ldhs|ldh},mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:HI + (plus:DI + (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L"))))) + (set (match_dup 1) + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldh,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (mem:HI (plus:SI (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "int5_operand" "L"))) + (match_operand:HI 2 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1)))] + "" + "{sths|sth},mb %r2,%1(%0)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "" + [(set (mem:HI (plus:DI (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "int5_operand" "L"))) + (match_operand:HI 2 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:DI (match_dup 0) (match_dup 1)))] + "TARGET_64BIT" + "sth,mb %r2,%1(%0)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "addhi3" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (plus:HI (match_operand:HI 1 "register_operand" "%r,r") + (match_operand:HI 2 "arith_operand" "r,J")))] + "" + "@ + {addl|add,l} %1,%2,%0 + ldo %2(%1),%0" + [(set_attr "type" "binary,binary") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4")]) + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " +{ + if (emit_move_sequence (operands, QImode, 0)) + DONE; +}") + +;; Handle QImode input reloads requiring a general register as a +;; scratch register. +(define_expand "reload_inqi" + [(set (match_operand:QI 0 "register_operand" "=Z") + (match_operand:QI 1 "non_hard_reg_operand" "")) + (clobber (match_operand:QI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, QImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle QImode output reloads requiring a general register as a +;; scratch register. +(define_expand "reload_outqi" + [(set (match_operand:QI 0 "non_hard_reg_operand" "") + (match_operand:QI 1 "register_operand" "Z")) + (clobber (match_operand:QI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, QImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +(define_insn "" + [(set (match_operand:QI 0 "move_dest_operand" + "=r,r,r,r,r,Q,!*q,!r") + (match_operand:QI 1 "move_src_operand" + "r,J,N,K,RQ,rM,!rM,!*q"))] + "(register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode))" + "@ + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + {zdepi|depwi,z} %Z1,%0 + ldb%M1 %1,%0 + stb%M0 %r1,%0 + mtsar %r1 + {mfctl|mfctl,w} %%sar,%0" + [(set_attr "type" "move,move,move,shift,load,store,move,move") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4")]) + +(define_insn "" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (plus:SI (match_operand:SI 1 "register_operand" "+r") + (match_operand:SI 2 "int5_operand" "L")))) + (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))] + "" + "{ldbs|ldb},mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L")))) + (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldb,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +; Now the same thing with zero extensions. +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (mem:QI (plus:DI + (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L"))))) + (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldb,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:QI (plus:SI + (match_operand:SI 1 "register_operand" "+r") + (match_operand:SI 2 "int5_operand" "L"))))) + (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))] + "" + "{ldbs|ldb},mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:QI (plus:DI + (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L"))))) + (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldb,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (mem:QI (plus:SI + (match_operand:SI 1 "register_operand" "+r") + (match_operand:SI 2 "int5_operand" "L"))))) + (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))] + "" + "{ldbs|ldb},mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (mem:QI (plus:DI + (match_operand:DI 1 "register_operand" "+r") + (match_operand:DI 2 "int5_operand" "L"))))) + (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT" + "ldb,mb %2(%1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (mem:QI (plus:SI (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "int5_operand" "L"))) + (match_operand:QI 2 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1)))] + "" + "{stbs|stb},mb %r2,%1(%0)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "" + [(set (mem:QI (plus:DI (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "int5_operand" "L"))) + (match_operand:QI 2 "reg_or_0_operand" "rM")) + (set (match_dup 0) + (plus:DI (match_dup 0) (match_dup 1)))] + "TARGET_64BIT" + "stb,mb %r2,%1(%0)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; The definition of this insn does not really explain what it does, +;; but it should suffice that anything generated as this insn will be +;; recognized as a movmemsi operation, and that it will not successfully +;; combine with anything. +(define_expand "movmemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_operand:SI 2 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "!TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* HP provides very fast block move library routine for the PA; + this routine includes: + + 4x4 byte at a time block moves, + 1x4 byte at a time with alignment checked at runtime with + attempts to align the source and destination as needed + 1x1 byte loop + + With that in mind, here's the heuristics to try and guess when + the inlined block move will be better than the library block + move: + + If the size isn't constant, then always use the library routines. + + If the size is large in respect to the known alignment, then use + the library routines. + + If the size is small in respect to the known alignment, then open + code the copy (since that will lead to better scheduling). + + Else use the block move pattern. */ + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[2]) != CONST_INT) + FAIL; + + size = INTVAL (operands[2]); + align = INTVAL (operands[3]); + align = align > 4 ? 4 : (align ? align : 1); + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) + FAIL; + + /* Fall through means we're going to use our block move pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (SImode, XEXP (operands[0], 0))); + operands[1] + = replace_equiv_address (operands[1], + copy_to_mode_reg (SImode, XEXP (operands[1], 0))); + operands[4] = gen_reg_rtx (SImode); + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = gen_reg_rtx (SImode); + operands[8] = gen_reg_rtx (SImode); +}") + +;; The operand constraints are written like this to support both compile-time +;; and run-time determined byte counts. The expander and output_block_move +;; only support compile-time determined counts at this time. +;; +;; If the count is run-time determined, the register with the byte count +;; is clobbered by the copying code, and therefore it is forced to operand 2. +;; +;; We used to clobber operands 0 and 1. However, a change to regrename.c +;; broke this semantic for pseudo registers. We can't use match_scratch +;; as this requires two registers in the class R1_REGS when the MEMs for +;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are +;; forced to internally copy operands 0 and 1 to operands 7 and 8, +;; respectively. We then split or peephole optimize after reload. +(define_insn "movmemsi_prereload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:SI 1 "register_operand" "r,r"))) + (clobber (match_operand:SI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_operand:SI 7 "register_operand" "=&r,&r")) ;item tmp3 + (clobber (match_operand:SI 8 "register_operand" "=&r,&r")) ;item tmp4 + (use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "")) + (clobber (match_operand:SI 3 "register_operand" "")) + (clobber (match_operand:SI 6 "register_operand" "")) + (clobber (match_operand:SI 7 "register_operand" "")) + (clobber (match_operand:SI 8 "register_operand" "")) + (use (match_operand:SI 4 "arith_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))])] + "!TARGET_64BIT && reload_completed && !flag_peephole2 + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), SImode) + && GET_CODE (operands[1]) == MEM + && register_operand (XEXP (operands[1], 0), SImode)" + [(set (match_dup 7) (match_dup 9)) + (set (match_dup 8) (match_dup 10)) + (parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + " +{ + operands[9] = XEXP (operands[0], 0); + operands[10] = XEXP (operands[1], 0); + operands[0] = replace_equiv_address (operands[0], operands[7]); + operands[1] = replace_equiv_address (operands[1], operands[8]); +}") + +(define_peephole2 + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "")) + (clobber (match_operand:SI 3 "register_operand" "")) + (clobber (match_operand:SI 6 "register_operand" "")) + (clobber (match_operand:SI 7 "register_operand" "")) + (clobber (match_operand:SI 8 "register_operand" "")) + (use (match_operand:SI 4 "arith_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))])] + "!TARGET_64BIT + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), SImode) + && GET_CODE (operands[1]) == MEM + && register_operand (XEXP (operands[1], 0), SImode)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + " +{ + rtx addr = XEXP (operands[0], 0); + if (dead_or_set_p (curr_insn, addr)) + operands[7] = addr; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[7], addr)); + operands[0] = replace_equiv_address (operands[0], operands[7]); + } + + addr = XEXP (operands[1], 0); + if (dead_or_set_p (curr_insn, addr)) + operands[8] = addr; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[8], addr)); + operands[1] = replace_equiv_address (operands[1], operands[8]); + } +}") + +(define_insn "movmemsi_postreload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r")) + (mem:BLK (match_operand:SI 1 "register_operand" "+r,r"))) + (clobber (match_operand:SI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:SI 5 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "!TARGET_64BIT && reload_completed" + "* return output_block_move (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +(define_expand "movmemdi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_operand:DI 2 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* HP provides very fast block move library routine for the PA; + this routine includes: + + 4x4 byte at a time block moves, + 1x4 byte at a time with alignment checked at runtime with + attempts to align the source and destination as needed + 1x1 byte loop + + With that in mind, here's the heuristics to try and guess when + the inlined block move will be better than the library block + move: + + If the size isn't constant, then always use the library routines. + + If the size is large in respect to the known alignment, then use + the library routines. + + If the size is small in respect to the known alignment, then open + code the copy (since that will lead to better scheduling). + + Else use the block move pattern. */ + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[2]) != CONST_INT) + FAIL; + + size = INTVAL (operands[2]); + align = INTVAL (operands[3]); + align = align > 8 ? 8 : (align ? align : 1); + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) + FAIL; + + /* Fall through means we're going to use our block move pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (DImode, XEXP (operands[0], 0))); + operands[1] + = replace_equiv_address (operands[1], + copy_to_mode_reg (DImode, XEXP (operands[1], 0))); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); + operands[6] = gen_reg_rtx (DImode); + operands[7] = gen_reg_rtx (DImode); + operands[8] = gen_reg_rtx (DImode); +}") + +;; The operand constraints are written like this to support both compile-time +;; and run-time determined byte counts. The expander and output_block_move +;; only support compile-time determined counts at this time. +;; +;; If the count is run-time determined, the register with the byte count +;; is clobbered by the copying code, and therefore it is forced to operand 2. +;; +;; We used to clobber operands 0 and 1. However, a change to regrename.c +;; broke this semantic for pseudo registers. We can't use match_scratch +;; as this requires two registers in the class R1_REGS when the MEMs for +;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are +;; forced to internally copy operands 0 and 1 to operands 7 and 8, +;; respectively. We then split or peephole optimize after reload. +(define_insn "movmemdi_prereload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:DI 1 "register_operand" "r,r"))) + (clobber (match_operand:DI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_operand:DI 7 "register_operand" "=&r,&r")) ;item tmp3 + (clobber (match_operand:DI 8 "register_operand" "=&r,&r")) ;item tmp4 + (use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment + "TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (clobber (match_operand:DI 2 "register_operand" "")) + (clobber (match_operand:DI 3 "register_operand" "")) + (clobber (match_operand:DI 6 "register_operand" "")) + (clobber (match_operand:DI 7 "register_operand" "")) + (clobber (match_operand:DI 8 "register_operand" "")) + (use (match_operand:DI 4 "arith_operand" "")) + (use (match_operand:DI 5 "const_int_operand" ""))])] + "TARGET_64BIT && reload_completed && !flag_peephole2 + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), DImode) + && GET_CODE (operands[1]) == MEM + && register_operand (XEXP (operands[1], 0), DImode)" + [(set (match_dup 7) (match_dup 9)) + (set (match_dup 8) (match_dup 10)) + (parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + " +{ + operands[9] = XEXP (operands[0], 0); + operands[10] = XEXP (operands[1], 0); + operands[0] = replace_equiv_address (operands[0], operands[7]); + operands[1] = replace_equiv_address (operands[1], operands[8]); +}") + +(define_peephole2 + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (clobber (match_operand:DI 2 "register_operand" "")) + (clobber (match_operand:DI 3 "register_operand" "")) + (clobber (match_operand:DI 6 "register_operand" "")) + (clobber (match_operand:DI 7 "register_operand" "")) + (clobber (match_operand:DI 8 "register_operand" "")) + (use (match_operand:DI 4 "arith_operand" "")) + (use (match_operand:DI 5 "const_int_operand" ""))])] + "TARGET_64BIT + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), DImode) + && GET_CODE (operands[1]) == MEM + && register_operand (XEXP (operands[1], 0), DImode)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + " +{ + rtx addr = XEXP (operands[0], 0); + if (dead_or_set_p (curr_insn, addr)) + operands[7] = addr; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[7], addr)); + operands[0] = replace_equiv_address (operands[0], operands[7]); + } + + addr = XEXP (operands[1], 0); + if (dead_or_set_p (curr_insn, addr)) + operands[8] = addr; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[8], addr)); + operands[1] = replace_equiv_address (operands[1], operands[8]); + } +}") + +(define_insn "movmemdi_postreload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r")) + (mem:BLK (match_operand:DI 1 "register_operand" "+r,r"))) + (clobber (match_operand:DI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:DI 5 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "TARGET_64BIT && reload_completed" + "* return output_block_move (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +(define_expand "setmemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand 2 "const_int_operand" "")) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (use (match_operand:SI 1 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "!TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[1]) != CONST_INT) + FAIL; + + size = INTVAL (operands[1]); + align = INTVAL (operands[3]); + align = align > 4 ? 4 : align; + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) + FAIL; + + /* Fall through means we're going to use our block clear pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (SImode, XEXP (operands[0], 0))); + operands[4] = gen_reg_rtx (SImode); + operands[5] = gen_reg_rtx (SImode); +}") + +(define_insn "clrmemsi_prereload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_operand:SI 4 "register_operand" "=&r,&r")) ;tmp1 + (use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "")) + (clobber (match_operand:SI 4 "register_operand" "")) + (use (match_operand:SI 2 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "!TARGET_64BIT && reload_completed && !flag_peephole2 + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), SImode)" + [(set (match_dup 4) (match_dup 5)) + (parallel [(set (match_dup 0) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + " +{ + operands[5] = XEXP (operands[0], 0); + operands[0] = replace_equiv_address (operands[0], operands[4]); +}") + +(define_peephole2 + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "")) + (clobber (match_operand:SI 4 "register_operand" "")) + (use (match_operand:SI 2 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "!TARGET_64BIT + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), SImode)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + " +{ + rtx addr = XEXP (operands[0], 0); + if (dead_or_set_p (curr_insn, addr)) + operands[4] = addr; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[4], addr)); + operands[0] = replace_equiv_address (operands[0], operands[4]); + } +}") + +(define_insn "clrmemsi_postreload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r")) + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_dup 0)) + (use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:SI 3 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "!TARGET_64BIT && reload_completed" + "* return output_block_clear (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +(define_expand "setmemdi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand 2 "const_int_operand" "")) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (use (match_operand:DI 1 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[1]) != CONST_INT) + FAIL; + + size = INTVAL (operands[1]); + align = INTVAL (operands[3]); + align = align > 8 ? 8 : align; + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) + FAIL; + + /* Fall through means we're going to use our block clear pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (DImode, XEXP (operands[0], 0))); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}") + +(define_insn "clrmemdi_prereload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_operand:DI 4 "register_operand" "=&r,&r")) ;item tmp1 + (use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment + "TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 4 "register_operand" "")) + (use (match_operand:DI 2 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT && reload_completed && !flag_peephole2 + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), DImode)" + [(set (match_dup 4) (match_dup 5)) + (parallel [(set (match_dup 0) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + " +{ + operands[5] = XEXP (operands[0], 0); + operands[0] = replace_equiv_address (operands[0], operands[4]); +}") + +(define_peephole2 + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 4 "register_operand" "")) + (use (match_operand:DI 2 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT + && GET_CODE (operands[0]) == MEM + && register_operand (XEXP (operands[0], 0), DImode)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + " +{ + rtx addr = XEXP (operands[0], 0); + if (dead_or_set_p (curr_insn, addr)) + operands[4] = addr; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[4], addr)); + operands[0] = replace_equiv_address (operands[0], operands[4]); + } +}") + +(define_insn "clrmemdi_postreload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r")) + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "=&r,&r")) ;loop cnt/tmp + (clobber (match_dup 0)) + (use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:DI 3 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "TARGET_64BIT && reload_completed" + "* return output_block_clear (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +;; Floating point move insns + +;; This pattern forces (set (reg:DF ...) (const_double ...)) +;; to be reloaded by putting the constant into memory when +;; reg is a floating point register. +;; +;; For integer registers we use ldil;ldo to set the appropriate +;; value. +;; +;; This must come before the movdf pattern, and it must be present +;; to handle obscure reloading cases. +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=?r,f") + (match_operand:DF 1 "" "?F,m"))] + "GET_CODE (operands[1]) == CONST_DOUBLE + && operands[1] != CONST0_RTX (DFmode) + && !TARGET_64BIT + && !TARGET_SOFT_FLOAT" + "* return (which_alternative == 0 ? output_move_double (operands) + : \"fldd%F1 %1,%0\");" + [(set_attr "type" "move,fpload") + (set_attr "length" "16,4")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + " +{ + if (GET_CODE (operands[1]) == CONST_DOUBLE + && operands[1] != CONST0_RTX (DFmode)) + { + /* Reject CONST_DOUBLE loads to all hard registers when + generating 64-bit code and to floating point registers + when generating 32-bit code. */ + if (REG_P (operands[0]) + && HARD_REGISTER_P (operands[0]) + && (TARGET_64BIT || REGNO (operands[0]) >= 32)) + FAIL; + + if (TARGET_64BIT) + operands[1] = force_const_mem (DFmode, operands[1]); + } + + if (emit_move_sequence (operands, DFmode, 0)) + DONE; +}") + +;; Handle DFmode input reloads requiring a general register as a +;; scratch register. +(define_expand "reload_indf" + [(set (match_operand:DF 0 "register_operand" "=Z") + (match_operand:DF 1 "non_hard_reg_operand" "")) + (clobber (match_operand:DF 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, DFmode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle DFmode output reloads requiring a general register as a +;; scratch register. +(define_expand "reload_outdf" + [(set (match_operand:DF 0 "non_hard_reg_operand" "") + (match_operand:DF 1 "register_operand" "Z")) + (clobber (match_operand:DF 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, DFmode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +(define_insn "" + [(set (match_operand:DF 0 "move_dest_operand" + "=f,*r,Q,?o,?Q,f,*r,*r,?*r,?f") + (match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand" + "fG,*rG,f,*r,*r,RQ,o,RQ,f,*r"))] + "(register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode)) + && !(GET_CODE (operands[1]) == CONST_DOUBLE + && GET_CODE (operands[0]) == MEM) + && !TARGET_64BIT + && !TARGET_SOFT_FLOAT" + "* +{ + if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1]) + || operands[1] == CONST0_RTX (DFmode)) + && !(REG_P (operands[0]) && REG_P (operands[1]) + && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))) + return output_fp_move_double (operands); + return output_move_double (operands); +}" + [(set_attr "type" "fpalu,move,fpstore,store,store,fpload,load,load,fpstore_load,store_fpload") + (set_attr "length" "4,8,4,8,16,4,8,16,12,12")]) + +(define_insn "" + [(set (match_operand:DF 0 "indexed_memory_operand" "=R") + (match_operand:DF 1 "reg_or_0_operand" "f"))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && reload_completed" + "fstd%F0 %1,%0" + [(set_attr "type" "fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4")]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "") + (const_int 8)) + (match_operand:SI 2 "register_operand" ""))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 8)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 2 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (const_int 8)))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 8)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 8)) + (match_operand:DI 2 "register_operand" ""))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 2 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 8)))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:SI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:SI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:DI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:DF (match_dup 0)) + (match_operand:DF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DF (plus:DI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))] + "") + +(define_insn "" + [(set (match_operand:DF 0 "move_dest_operand" + "=r,?o,?Q,r,r") + (match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand" + "rG,r,r,o,RQ"))] + "(register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode)) + && !TARGET_64BIT + && TARGET_SOFT_FLOAT" + "* +{ + return output_move_double (operands); +}" + [(set_attr "type" "move,store,store,load,load") + (set_attr "length" "8,8,16,8,16")]) + +(define_insn "" + [(set (match_operand:DF 0 "move_dest_operand" + "=!*r,*r,*r,*r,*r,Q,f,f,T") + (match_operand:DF 1 "move_src_operand" + "!*r,J,N,K,RQ,*rG,fG,RT,f"))] + "(register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode)) + && !TARGET_SOFT_FLOAT && TARGET_64BIT" + "@ + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + depdi,z %z1,%0 + ldd%M1 %1,%0 + std%M0 %r1,%0 + fcpy,dbl %f1,%0 + fldd%F1 %1,%0 + fstd%F0 %1,%0" + [(set_attr "type" "move,move,move,shift,load,store,fpalu,fpload,fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4,4")]) + + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " +{ + /* Except for zero, we don't support loading a CONST_INT directly + to a hard floating-point register since a scratch register is + needed for the operation. While the operation could be handled + before register allocation, the simplest solution is to fail. */ + if (TARGET_64BIT + && GET_CODE (operands[1]) == CONST_INT + && operands[1] != CONST0_RTX (DImode) + && REG_P (operands[0]) + && HARD_REGISTER_P (operands[0]) + && REGNO (operands[0]) >= 32) + FAIL; + + if (emit_move_sequence (operands, DImode, 0)) + DONE; +}") + +;; Handle DImode input reloads requiring %r1 as a scratch register. +(define_expand "reload_indi_r1" + [(set (match_operand:DI 0 "register_operand" "=Z") + (match_operand:DI 1 "non_hard_reg_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "=&a"))] + "" + " +{ + if (emit_move_sequence (operands, DImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle DImode input reloads requiring a general register as a +;; scratch register. +(define_expand "reload_indi" + [(set (match_operand:DI 0 "register_operand" "=Z") + (match_operand:DI 1 "non_hard_reg_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, DImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle DImode output reloads requiring a general register as a +;; scratch register. +(define_expand "reload_outdi" + [(set (match_operand:DI 0 "non_hard_reg_operand" "") + (match_operand:DI 1 "register_operand" "Z")) + (clobber (match_operand:SI 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, DImode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (match_operand 1 "" "")))] + "!TARGET_64BIT" + "* +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + switch (GET_CODE (op1)) + { + case CONST_INT: +#if HOST_BITS_PER_WIDE_INT <= 32 + operands[0] = operand_subword (op0, 1, 0, DImode); + output_asm_insn (\"ldil L'%1,%0\", operands); + + operands[0] = operand_subword (op0, 0, 0, DImode); + if (INTVAL (op1) < 0) + output_asm_insn (\"ldi -1,%0\", operands); + else + output_asm_insn (\"ldi 0,%0\", operands); +#else + operands[0] = operand_subword (op0, 1, 0, DImode); + operands[1] = GEN_INT (INTVAL (op1) & 0xffffffff); + output_asm_insn (\"ldil L'%1,%0\", operands); + + operands[0] = operand_subword (op0, 0, 0, DImode); + operands[1] = GEN_INT (INTVAL (op1) >> 32); + output_asm_insn (singlemove_string (operands), operands); +#endif + break; + + case CONST_DOUBLE: + operands[0] = operand_subword (op0, 1, 0, DImode); + operands[1] = GEN_INT (CONST_DOUBLE_LOW (op1)); + output_asm_insn (\"ldil L'%1,%0\", operands); + + operands[0] = operand_subword (op0, 0, 0, DImode); + operands[1] = GEN_INT (CONST_DOUBLE_HIGH (op1)); + output_asm_insn (singlemove_string (operands), operands); + break; + + default: + gcc_unreachable (); + } + return \"\"; +}" + [(set_attr "type" "move") + (set_attr "length" "12")]) + +(define_insn "" + [(set (match_operand:DI 0 "move_dest_operand" + "=r,o,Q,r,r,r,*f,*f,T,?r,?*f") + (match_operand:DI 1 "general_operand" + "rM,r,r,o*R,Q,i,*fM,RT,*f,*f,r"))] + "(register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode)) + && !TARGET_64BIT + && !TARGET_SOFT_FLOAT" + "* +{ + if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1]) + || operands[1] == CONST0_RTX (DFmode)) + && !(REG_P (operands[0]) && REG_P (operands[1]) + && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))) + return output_fp_move_double (operands); + return output_move_double (operands); +}" + [(set_attr "type" + "move,store,store,load,load,multi,fpalu,fpload,fpstore,fpstore_load,store_fpload") + (set_attr "length" "8,8,16,8,16,16,4,4,4,12,12")]) + +(define_insn "" + [(set (match_operand:DI 0 "move_dest_operand" + "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T") + (match_operand:DI 1 "move_src_operand" + "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))] + "(register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode)) + && !TARGET_SOFT_FLOAT && TARGET_64BIT" + "@ + ldd RT'%A1,%0 + copy %1,%0 + ldi %1,%0 + ldil L'%1,%0 + depdi,z %z1,%0 + ldd%M1 %1,%0 + std%M0 %r1,%0 + mtsar %r1 + {mfctl|mfctl,w} %%sar,%0 + fcpy,dbl %f1,%0 + fldd%F1 %1,%0 + fstd%F0 %1,%0" + [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "indexed_memory_operand" "=R") + (match_operand:DI 1 "register_operand" "f"))] + "!TARGET_SOFT_FLOAT + && TARGET_64BIT + && !TARGET_DISABLE_INDEXING + && reload_completed" + "fstd%F0 %1,%0" + [(set_attr "type" "fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4")]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 8)) + (match_operand:DI 2 "register_operand" ""))) + (set (mem:DI (match_dup 0)) + (match_operand:DI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 2 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 8)))) + (set (mem:DI (match_dup 0)) + (match_operand:DI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:DI (match_dup 0)) + (match_operand:DI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DI (plus:DI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:DI (match_dup 0)) + (match_operand:DI 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:DI (plus:DI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))] + "") + +(define_insn "" + [(set (match_operand:DI 0 "move_dest_operand" + "=r,o,Q,r,r,r") + (match_operand:DI 1 "general_operand" + "rM,r,r,o,Q,i"))] + "(register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode)) + && !TARGET_64BIT + && TARGET_SOFT_FLOAT" + "* +{ + return output_move_double (operands); +}" + [(set_attr "type" "move,store,store,load,load,multi") + (set_attr "length" "8,8,16,8,16,16")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,&r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 2 "immediate_operand" "i,i")))] + "!TARGET_64BIT" + "* +{ + /* Don't output a 64-bit constant, since we can't trust the assembler to + handle it correctly. */ + if (GET_CODE (operands[2]) == CONST_DOUBLE) + operands[2] = GEN_INT (CONST_DOUBLE_LOW (operands[2])); + else if (HOST_BITS_PER_WIDE_INT > 32 + && GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffffffff); + if (which_alternative == 1) + output_asm_insn (\"copy %1,%0\", operands); + return \"ldo R'%G2(%R1),%R0\"; +}" + [(set_attr "type" "move,move") + (set_attr "length" "4,8")]) + +;; This pattern forces (set (reg:SF ...) (const_double ...)) +;; to be reloaded by putting the constant into memory when +;; reg is a floating point register. +;; +;; For integer registers we use ldil;ldo to set the appropriate +;; value. +;; +;; This must come before the movsf pattern, and it must be present +;; to handle obscure reloading cases. +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=?r,f") + (match_operand:SF 1 "" "?F,m"))] + "GET_CODE (operands[1]) == CONST_DOUBLE + && operands[1] != CONST0_RTX (SFmode) + && ! TARGET_SOFT_FLOAT" + "* return (which_alternative == 0 ? singlemove_string (operands) + : \" fldw%F1 %1,%0\");" + [(set_attr "type" "move,fpload") + (set_attr "length" "8,4")]) + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + " +{ + /* Reject CONST_DOUBLE loads to floating point registers. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE + && operands[1] != CONST0_RTX (SFmode) + && REG_P (operands[0]) + && HARD_REGISTER_P (operands[0]) + && REGNO (operands[0]) >= 32) + FAIL; + + if (emit_move_sequence (operands, SFmode, 0)) + DONE; +}") + +;; Handle SFmode input reloads requiring a general register as a +;; scratch register. +(define_expand "reload_insf" + [(set (match_operand:SF 0 "register_operand" "=Z") + (match_operand:SF 1 "non_hard_reg_operand" "")) + (clobber (match_operand:SF 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, SFmode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +;; Handle SFmode output reloads requiring a general register as a +;; scratch register. +(define_expand "reload_outsf" + [(set (match_operand:SF 0 "non_hard_reg_operand" "") + (match_operand:SF 1 "register_operand" "Z")) + (clobber (match_operand:SF 2 "register_operand" "=&r"))] + "" + " +{ + if (emit_move_sequence (operands, SFmode, operands[2])) + DONE; + + /* We don't want the clobber emitted, so handle this ourselves. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; +}") + +(define_insn "" + [(set (match_operand:SF 0 "move_dest_operand" + "=f,!*r,f,*r,Q,Q,?*r,?f") + (match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand" + "fG,!*rG,RQ,RQ,f,*rG,f,*r"))] + "(register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode)) + && !TARGET_SOFT_FLOAT + && !TARGET_64BIT" + "@ + fcpy,sgl %f1,%0 + copy %r1,%0 + fldw%F1 %1,%0 + ldw%M1 %1,%0 + fstw%F0 %1,%0 + stw%M0 %r1,%0 + {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0 + {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0" + [(set_attr "type" "fpalu,move,fpload,load,fpstore,store,fpstore_load,store_fpload") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4,8,8")]) + +(define_insn "" + [(set (match_operand:SF 0 "move_dest_operand" + "=f,!*r,f,*r,Q,Q") + (match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand" + "fG,!*rG,RQ,RQ,f,*rG"))] + "(register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode)) + && !TARGET_SOFT_FLOAT + && TARGET_64BIT" + "@ + fcpy,sgl %f1,%0 + copy %r1,%0 + fldw%F1 %1,%0 + ldw%M1 %1,%0 + fstw%F0 %1,%0 + stw%M0 %r1,%0" + [(set_attr "type" "fpalu,move,fpload,load,fpstore,store") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4,4,4,4")]) + +(define_insn "" + [(set (match_operand:SF 0 "indexed_memory_operand" "=R") + (match_operand:SF 1 "register_operand" "f"))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && reload_completed" + "fstw%F0 %1,%0" + [(set_attr "type" "fpstore") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4")]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "") + (const_int 4)) + (match_operand:SI 2 "register_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 2 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (const_int 4)))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 4)) + (match_operand:DI 2 "register_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 2 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (const_int 4)))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4)) + (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:SI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:SI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_INDEX_P (operands[1]) + && REG_OK_FOR_BASE_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:DI (match_dup 1) (match_dup 2))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_SOFT_FLOAT + && !TARGET_DISABLE_INDEXING + && TARGET_64BIT + && TARGET_NO_SPACE_REGS + && REG_OK_FOR_BASE_P (operands[1]) + && REG_OK_FOR_INDEX_P (operands[2]) + && FP_REGNO_P (REGNO (operands[3]))" + [(set (mem:SF (plus:DI (match_dup 2) (match_dup 1))) + (match_dup 3)) + (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))] + "") + +(define_insn "" + [(set (match_operand:SF 0 "move_dest_operand" + "=r,r,Q") + (match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand" + "rG,RQ,rG"))] + "(register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode)) + && TARGET_SOFT_FLOAT" + "@ + copy %r1,%0 + ldw%M1 %1,%0 + stw%M0 %r1,%0" + [(set_attr "type" "move,load,store") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4,4")]) + + + +;;- zero extension instructions +;; We have define_expand for zero extension patterns to make sure the +;; operands get loaded into registers. The define_insns accept +;; memory operands. This gives us better overall code than just +;; having a pattern that does or does not accept memory operands. + +(define_expand "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI + (match_operand:QI 1 "register_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI + (match_operand:QI 1 "move_src_operand" "r,RQ")))] + "GET_CODE (operands[1]) != CONST_INT" + "@ + {extru|extrw,u} %1,31,8,%0 + ldb%M1 %1,%0" + [(set_attr "type" "shift,load") + (set_attr "length" "4,4")]) + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI + (match_operand:QI 1 "register_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI + (match_operand:QI 1 "move_src_operand" "r,RQ")))] + "GET_CODE (operands[1]) != CONST_INT" + "@ + {extru|extrw,u} %1,31,8,%0 + ldb%M1 %1,%0" + [(set_attr "type" "shift,load") + (set_attr "length" "4,4")]) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI + (match_operand:HI 1 "register_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI + (match_operand:HI 1 "move_src_operand" "r,RQ")))] + "GET_CODE (operands[1]) != CONST_INT" + "@ + {extru|extrw,u} %1,31,16,%0 + ldh%M1 %1,%0" + [(set_attr "type" "shift,load") + (set_attr "length" "4,4")]) + +(define_expand "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (match_operand:QI 1 "register_operand" "")))] + "TARGET_64BIT" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (match_operand:QI 1 "move_src_operand" "r,RQ")))] + "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT" + "@ + extrd,u %1,63,8,%0 + ldb%M1 %1,%0" + [(set_attr "type" "shift,load") + (set_attr "length" "4,4")]) + +(define_expand "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (match_operand:HI 1 "register_operand" "")))] + "TARGET_64BIT" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (match_operand:HI 1 "move_src_operand" "r,RQ")))] + "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT" + "@ + extrd,u %1,63,16,%0 + ldh%M1 %1,%0" + [(set_attr "type" "shift,load") + (set_attr "length" "4,4")]) + +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (match_operand:SI 1 "register_operand" "")))] + "TARGET_64BIT" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (match_operand:SI 1 "move_src_operand" "r,RQ")))] + "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT" + "@ + extrd,u %1,63,32,%0 + ldw%M1 %1,%0" + [(set_attr "type" "shift,load") + (set_attr "length" "4,4")]) + +;;- sign extension instructions + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "r")))] + "" + "{extrs|extrw,s} %1,31,16,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "register_operand" "r")))] + "" + "{extrs|extrw,s} %1,31,8,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "register_operand" "r")))] + "" + "{extrs|extrw,s} %1,31,8,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_64BIT" + "extrd,s %1,63,8,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_64BIT" + "extrd,s %1,63,16,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_64BIT" + "extrd,s %1,63,32,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + + +;; Conversions between float and double. + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float_extend:DF + (match_operand:SF 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "{fcnvff|fcnv},sgl,dbl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "{fcnvff|fcnv},dbl,sgl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +;; Conversion between fixed point and floating point. +;; Note that among the fix-to-float insns +;; the ones that start with SImode come first. +;; That is so that an operand that is a CONST_INT +;; (and therefore lacks a specific machine mode). +;; will be recognized as SImode (which is always valid) +;; rather than as QImode or HImode. + +;; This pattern forces (set (reg:SF ...) (float:SF (const_int ...))) +;; to be reloaded by putting the constant into memory. +;; It must come before the more general floatsisf2 pattern. +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "const_int_operand" "m")))] + "! TARGET_SOFT_FLOAT" + "fldw%F1 %1,%0\;{fcnvxf,sgl,sgl|fcnv,w,sgl} %0,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "8")]) + +(define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "{fcnvxf,sgl,sgl|fcnv,w,sgl} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +;; This pattern forces (set (reg:DF ...) (float:DF (const_int ...))) +;; to be reloaded by putting the constant into memory. +;; It must come before the more general floatsidf2 pattern. +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:SI 1 "const_int_operand" "m")))] + "! TARGET_SOFT_FLOAT" + "fldw%F1 %1,%0\;{fcnvxf,sgl,dbl|fcnv,w,dbl} %0,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "8")]) + +(define_insn "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:SI 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "{fcnvxf,sgl,dbl|fcnv,w,dbl} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_expand "floatunssisf2" + [(set (subreg:SI (match_dup 2) 4) + (match_operand:SI 1 "register_operand" "")) + (set (subreg:SI (match_dup 2) 0) + (const_int 0)) + (set (match_operand:SF 0 "register_operand" "") + (float:SF (match_dup 2)))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT" + " +{ + if (TARGET_PA_20) + { + emit_insn (gen_floatunssisf2_pa20 (operands[0], operands[1])); + DONE; + } + operands[2] = gen_reg_rtx (DImode); +}") + +(define_expand "floatunssidf2" + [(set (subreg:SI (match_dup 2) 4) + (match_operand:SI 1 "register_operand" "")) + (set (subreg:SI (match_dup 2) 0) + (const_int 0)) + (set (match_operand:DF 0 "register_operand" "") + (float:DF (match_dup 2)))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT" + " +{ + if (TARGET_PA_20) + { + emit_insn (gen_floatunssidf2_pa20 (operands[0], operands[1])); + DONE; + } + operands[2] = gen_reg_rtx (DImode); +}") + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "register_operand" "f")))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT" + "{fcnvxf,dbl,sgl|fcnv,dw,sgl} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "register_operand" "f")))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT" + "{fcnvxf,dbl,dbl|fcnv,dw,dbl} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +;; Convert a float to an actual integer. +;; Truncation is performed as part of the conversion. + +(define_insn "fix_truncsfsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] + "! TARGET_SOFT_FLOAT" + "{fcnvfxt,sgl,sgl|fcnv,t,sgl,w} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fix_truncdfsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))] + "! TARGET_SOFT_FLOAT" + "{fcnvfxt,dbl,sgl|fcnv,t,dbl,w} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT" + "{fcnvfxt,sgl,dbl|fcnv,t,sgl,dw} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT" + "{fcnvfxt,dbl,dbl|fcnv,t,dbl,dw} %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "floatunssidf2_pa20" + [(set (match_operand:DF 0 "register_operand" "=f") + (unsigned_float:DF (match_operand:SI 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,uw,dbl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "floatunssisf2_pa20" + [(set (match_operand:SF 0 "register_operand" "=f") + (unsigned_float:SF (match_operand:SI 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,uw,sgl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "floatunsdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unsigned_float:SF (match_operand:DI 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,udw,sgl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "floatunsdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unsigned_float:DF (match_operand:DI 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,udw,dbl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (unsigned_fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,t,sgl,uw %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (unsigned_fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,t,dbl,uw %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (unsigned_fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,t,sgl,udw %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (unsigned_fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + "fcnv,t,dbl,udw %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +;;- arithmetic instructions + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "adddi3_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "arith11_operand" "rI")))] + "!TARGET_64BIT" + "* +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + if (INTVAL (operands[2]) >= 0) + return \"addi %2,%R1,%R0\;{addc|add,c} %1,%%r0,%0\"; + else + return \"addi %2,%R1,%R0\;{subb|sub,b} %1,%%r0,%0\"; + } + else + return \"add %R2,%R1,%R0\;{addc|add,c} %2,%1,%0\"; +}" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r") + (match_operand:DI 2 "arith_operand" "r,J")))] + "TARGET_64BIT" + "@ + add,l %1,%2,%0 + ldo %2(%1),%0" + [(set_attr "type" "binary,binary") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (not:DI (match_operand:DI 1 "register_operand" "r")) + (match_operand:DI 2 "register_operand" "r")))] + "TARGET_64BIT" + "uaddcm %2,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (not:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r")))] + "" + "uaddcm %2,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_expand "addvdi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "arith11_operand" ""))) + (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (plus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))])] + "" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rM,rM") + (match_operand:DI 2 "arith11_operand" "r,I"))) + (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (plus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "TARGET_64BIT" + "@ + add,tsv,* %2,%1,%0 + addi,tsv,* %2,%1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rM") + (match_operand:DI 2 "arith11_operand" "rI"))) + (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (plus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "!TARGET_64BIT" + "* +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + if (INTVAL (operands[2]) >= 0) + return \"addi %2,%R1,%R0\;{addco|add,c,tsv} %1,%%r0,%0\"; + else + return \"addi %2,%R1,%R0\;{subbo|sub,b,tsv} %1,%%r0,%0\"; + } + else + return \"add %R2,%R1,%R0\;{addco|add,c,tsv} %2,%1,%0\"; +}" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +;; define_splits to optimize cases of adding a constant integer +;; to a register when the constant does not fit in 14 bits. */ +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (match_operand:SI 4 "register_operand" ""))] + "! cint_ok_for_move (INTVAL (operands[2])) + && VAL_14_BITS_P (INTVAL (operands[2]) >> 1)" + [(set (match_dup 4) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 4) (match_dup 3)))] + " +{ + int val = INTVAL (operands[2]); + int low = (val < 0) ? -0x2000 : 0x1fff; + int rest = val - low; + + operands[2] = GEN_INT (rest); + operands[3] = GEN_INT (low); +}") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (match_operand:SI 4 "register_operand" ""))] + "! cint_ok_for_move (INTVAL (operands[2]))" + [(set (match_dup 4) (match_dup 2)) + (set (match_dup 0) (plus:SI (mult:SI (match_dup 4) (match_dup 3)) + (match_dup 1)))] + " +{ + HOST_WIDE_INT intval = INTVAL (operands[2]); + + /* Try dividing the constant by 2, then 4, and finally 8 to see + if we can get a constant which can be loaded into a register + in a single instruction (cint_ok_for_move). + + If that fails, try to negate the constant and subtract it + from our input operand. */ + if (intval % 2 == 0 && cint_ok_for_move (intval / 2)) + { + operands[2] = GEN_INT (intval / 2); + operands[3] = const2_rtx; + } + else if (intval % 4 == 0 && cint_ok_for_move (intval / 4)) + { + operands[2] = GEN_INT (intval / 4); + operands[3] = GEN_INT (4); + } + else if (intval % 8 == 0 && cint_ok_for_move (intval / 8)) + { + operands[2] = GEN_INT (intval / 8); + operands[3] = GEN_INT (8); + } + else if (cint_ok_for_move (-intval)) + { + emit_insn (gen_rtx_SET (VOIDmode, operands[4], GEN_INT (-intval))); + emit_insn (gen_subsi3 (operands[0], operands[1], operands[4])); + DONE; + } + else + FAIL; +}") + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "%r,r") + (match_operand:SI 2 "arith_operand" "r,J")))] + "" + "@ + {addl|add,l} %1,%2,%0 + ldo %2(%1),%0" + [(set_attr "type" "binary,binary") + (set_attr "pa_combine_type" "addmove") + (set_attr "length" "4,4")]) + +(define_insn "addvsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rM,rM") + (match_operand:SI 2 "arith11_operand" "r,I"))) + (trap_if (ne (plus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2))) + (sign_extend:DI (plus:SI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "@ + {addo|add,tsv} %2,%1,%0 + {addio|addi,tsv} %2,%1,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,4")]) + +(define_expand "subdi3" + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (match_operand:DI 1 "arith11_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r,!q") + (minus:DI (match_operand:DI 1 "arith11_operand" "r,I,!U") + (match_operand:DI 2 "reg_or_0_operand" "rM,rM,!rM")))] + "TARGET_64BIT" + "@ + sub %1,%2,%0 + subi %1,%2,%0 + mtsarcm %2" + [(set_attr "type" "binary,binary,move") + (set_attr "length" "4,4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,&r") + (minus:DI (match_operand:DI 1 "arith11_operand" "r,I") + (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))] + "!TARGET_64BIT" + "* +{ + if (GET_CODE (operands[1]) == CONST_INT) + { + if (INTVAL (operands[1]) >= 0) + return \"subi %1,%R2,%R0\;{subb|sub,b} %%r0,%2,%0\"; + else + return \"ldi -1,%0\;subi %1,%R2,%R0\;{subb|sub,b} %0,%2,%0\"; + } + else + return \"sub %R1,%R2,%R0\;{subb|sub,b} %1,%2,%0\"; +}" + [(set_attr "type" "binary") + (set (attr "length") + (if_then_else (eq_attr "alternative" "0") + (const_int 8) + (if_then_else (ge (symbol_ref "INTVAL (operands[1])") + (const_int 0)) + (const_int 8) + (const_int 12))))]) + +(define_expand "subvdi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (match_operand:DI 1 "arith11_operand" "") + (match_operand:DI 2 "reg_or_0_operand" ""))) + (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (minus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))])] + "" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (minus:DI (match_operand:DI 1 "arith11_operand" "r,I") + (match_operand:DI 2 "reg_or_0_operand" "rM,rM"))) + (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (minus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "TARGET_64BIT" + "@ + {subo|sub,tsv} %1,%2,%0 + {subio|subi,tsv} %1,%2,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,&r") + (minus:DI (match_operand:DI 1 "arith11_operand" "r,I") + (match_operand:DI 2 "reg_or_0_operand" "rM,rM"))) + (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (minus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "!TARGET_64BIT" + "* +{ + if (GET_CODE (operands[1]) == CONST_INT) + { + if (INTVAL (operands[1]) >= 0) + return \"subi %1,%R2,%R0\;{subbo|sub,b,tsv} %%r0,%2,%0\"; + else + return \"ldi -1,%0\;subi %1,%R2,%R0\;{subbo|sub,b,tsv} %0,%2,%0\"; + } + else + return \"sub %R1,%R2,%R0\;{subbo|sub,b,tsv} %1,%2,%0\"; +}" + [(set_attr "type" "binary,binary") + (set (attr "length") + (if_then_else (eq_attr "alternative" "0") + (const_int 8) + (if_then_else (ge (symbol_ref "INTVAL (operands[1])") + (const_int 0)) + (const_int 8) + (const_int 12))))]) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (match_operand:SI 1 "arith11_operand" "") + (match_operand:SI 2 "register_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "arith11_operand" "r,I") + (match_operand:SI 2 "register_operand" "r,r")))] + "!TARGET_PA_20" + "@ + sub %1,%2,%0 + subi %1,%2,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r,!q") + (minus:SI (match_operand:SI 1 "arith11_operand" "r,I,!S") + (match_operand:SI 2 "register_operand" "r,r,!r")))] + "TARGET_PA_20" + "@ + sub %1,%2,%0 + subi %1,%2,%0 + mtsarcm %2" + [(set_attr "type" "binary,binary,move") + (set_attr "length" "4,4,4")]) + +(define_insn "subvsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "arith11_operand" "rM,I") + (match_operand:SI 2 "reg_or_0_operand" "rM,rM"))) + (trap_if (ne (minus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2))) + (sign_extend:DI (minus:SI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "@ + {subo|sub,tsv} %1,%2,%0 + {subio|subi,tsv} %1,%2,%0" + [(set_attr "type" "binary,binary") + (set_attr "length" "4,4")]) + +;; Clobbering a "register_operand" instead of a match_scratch +;; in operand3 of millicode calls avoids spilling %r1 and +;; produces better code. + +;; The mulsi3 insns set up registers for the millicode call. +(define_expand "mulsi3" + [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" "")) + (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" "")) + (parallel [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_dup 3)) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (match_dup 4))]) + (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))] + "" + " +{ + operands[4] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); + if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) + { + rtx scratch = gen_reg_rtx (DImode); + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_umulsidi3 (scratch, operands[1], operands[2])); + emit_insn (gen_movsi (operands[0], + gen_rtx_SUBREG (SImode, scratch, + GET_MODE_SIZE (SImode)))); + DONE; + } + operands[3] = gen_reg_rtx (SImode); +}") + +(define_insn "umulsidi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "=f") + (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f")) + (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "f"))))] + "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT" + "xmpyu %1,%2,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "nonimmediate_operand" "=f") + (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f")) + (match_operand:DI 2 "uint32_operand" "f")))] + "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && !TARGET_64BIT" + "xmpyu %1,%R2,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "nonimmediate_operand" "=f") + (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f")) + (match_operand:DI 2 "uint32_operand" "f")))] + "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && TARGET_64BIT" + "xmpyu %1,%2R,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "" + [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 31))] + "!TARGET_64BIT" + "* return output_mul_insn (0, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_insn "" + [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 2))] + "TARGET_64BIT" + "* return output_mul_insn (0, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_expand "muldi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")))] + "TARGET_64BIT && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT" + " +{ + rtx low_product = gen_reg_rtx (DImode); + rtx cross_product1 = gen_reg_rtx (DImode); + rtx cross_product2 = gen_reg_rtx (DImode); + rtx cross_scratch = gen_reg_rtx (DImode); + rtx cross_product = gen_reg_rtx (DImode); + rtx op1l, op1r, op2l, op2r; + rtx op1shifted, op2shifted; + + op1shifted = gen_reg_rtx (DImode); + op2shifted = gen_reg_rtx (DImode); + op1l = gen_reg_rtx (SImode); + op1r = gen_reg_rtx (SImode); + op2l = gen_reg_rtx (SImode); + op2r = gen_reg_rtx (SImode); + + emit_move_insn (op1shifted, gen_rtx_LSHIFTRT (DImode, operands[1], + GEN_INT (32))); + emit_move_insn (op2shifted, gen_rtx_LSHIFTRT (DImode, operands[2], + GEN_INT (32))); + op1r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[1], 4)); + op2r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[2], 4)); + op1l = force_reg (SImode, gen_rtx_SUBREG (SImode, op1shifted, 4)); + op2l = force_reg (SImode, gen_rtx_SUBREG (SImode, op2shifted, 4)); + + /* Emit multiplies for the cross products. */ + emit_insn (gen_umulsidi3 (cross_product1, op2r, op1l)); + emit_insn (gen_umulsidi3 (cross_product2, op2l, op1r)); + + /* Emit a multiply for the low sub-word. */ + emit_insn (gen_umulsidi3 (low_product, copy_rtx (op2r), copy_rtx (op1r))); + + /* Sum the cross products and shift them into proper position. */ + emit_insn (gen_adddi3 (cross_scratch, cross_product1, cross_product2)); + emit_insn (gen_ashldi3 (cross_product, cross_scratch, GEN_INT (32))); + + /* Add the cross product to the low product and store the result + into the output operand . */ + emit_insn (gen_adddi3 (operands[0], cross_product, low_product)); + DONE; +}") + +;;; Division and mod. +(define_expand "divsi3" + [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" "")) + (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" "")) + (parallel [(set (reg:SI 29) (div:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (match_dup 5))]) + (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))] + "" + " +{ + operands[3] = gen_reg_rtx (SImode); + if (TARGET_64BIT) + { + operands[5] = gen_rtx_REG (SImode, 2); + operands[4] = operands[5]; + } + else + { + operands[5] = gen_rtx_REG (SImode, 31); + operands[4] = gen_reg_rtx (SImode); + } + if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 0)) + DONE; +}") + +(define_insn "" + [(set (reg:SI 29) + (div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" ""))) + (clobber (match_operand:SI 1 "register_operand" "=a")) + (clobber (match_operand:SI 2 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 31))] + "!TARGET_64BIT" + "* + return output_div_insn (operands, 0, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_insn "" + [(set (reg:SI 29) + (div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" ""))) + (clobber (match_operand:SI 1 "register_operand" "=a")) + (clobber (match_operand:SI 2 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 2))] + "TARGET_64BIT" + "* + return output_div_insn (operands, 0, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_expand "udivsi3" + [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" "")) + (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" "")) + (parallel [(set (reg:SI 29) (udiv:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (match_dup 5))]) + (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))] + "" + " +{ + operands[3] = gen_reg_rtx (SImode); + + if (TARGET_64BIT) + { + operands[5] = gen_rtx_REG (SImode, 2); + operands[4] = operands[5]; + } + else + { + operands[5] = gen_rtx_REG (SImode, 31); + operands[4] = gen_reg_rtx (SImode); + } + if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 1)) + DONE; +}") + +(define_insn "" + [(set (reg:SI 29) + (udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" ""))) + (clobber (match_operand:SI 1 "register_operand" "=a")) + (clobber (match_operand:SI 2 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 31))] + "!TARGET_64BIT" + "* + return output_div_insn (operands, 1, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_insn "" + [(set (reg:SI 29) + (udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" ""))) + (clobber (match_operand:SI 1 "register_operand" "=a")) + (clobber (match_operand:SI 2 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 2))] + "TARGET_64BIT" + "* + return output_div_insn (operands, 1, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_expand "modsi3" + [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" "")) + (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" "")) + (parallel [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (match_dup 5))]) + (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))] + "" + " +{ + if (TARGET_64BIT) + { + operands[5] = gen_rtx_REG (SImode, 2); + operands[4] = operands[5]; + } + else + { + operands[5] = gen_rtx_REG (SImode, 31); + operands[4] = gen_reg_rtx (SImode); + } + operands[3] = gen_reg_rtx (SImode); +}") + +(define_insn "" + [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (match_operand:SI 1 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 31))] + "!TARGET_64BIT" + "* + return output_mod_insn (0, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_insn "" + [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (match_operand:SI 1 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 2))] + "TARGET_64BIT" + "* + return output_mod_insn (0, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_expand "umodsi3" + [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" "")) + (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" "")) + (parallel [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (match_dup 5))]) + (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))] + "" + " +{ + if (TARGET_64BIT) + { + operands[5] = gen_rtx_REG (SImode, 2); + operands[4] = operands[5]; + } + else + { + operands[5] = gen_rtx_REG (SImode, 31); + operands[4] = gen_reg_rtx (SImode); + } + operands[3] = gen_reg_rtx (SImode); +}") + +(define_insn "" + [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (match_operand:SI 1 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 31))] + "!TARGET_64BIT" + "* + return output_mod_insn (1, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +(define_insn "" + [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25))) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (match_operand:SI 1 "register_operand" "=&r")) + (clobber (reg:SI 26)) + (clobber (reg:SI 25)) + (clobber (reg:SI 2))] + "TARGET_64BIT" + "* + return output_mod_insn (1, insn);" + [(set_attr "type" "milli") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_millicode_call (insn)")))]) + +;;- and instructions +;; We define DImode `and` so with DImode `not` we can get +;; DImode `andn`. Other combinations are possible. + +(define_expand "anddi3" + [(set (match_operand:DI 0 "register_operand" "") + (and:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "and_operand" "")))] + "" + " +{ + /* Both operands must be register operands. */ + if (!TARGET_64BIT && !register_operand (operands[2], DImode)) + FAIL; +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "register_operand" "r")))] + "!TARGET_64BIT" + "and %1,%2,%0\;and %R1,%R2,%R0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (and:DI (match_operand:DI 1 "register_operand" "%?r,0") + (match_operand:DI 2 "and_operand" "rO,P")))] + "TARGET_64BIT" + "* return output_64bit_and (operands); " + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +; The ? for op1 makes reload prefer zdepi instead of loading a huge +; constant with ldil;ldo. +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (and:SI (match_operand:SI 1 "register_operand" "%?r,0") + (match_operand:SI 2 "and_operand" "rO,P")))] + "" + "* return output_and (operands); " + [(set_attr "type" "binary,shift") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (match_operand:DI 1 "register_operand" "r")) + (match_operand:DI 2 "register_operand" "r")))] + "!TARGET_64BIT" + "andcm %2,%1,%0\;andcm %R2,%R1,%R0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (match_operand:DI 1 "register_operand" "r")) + (match_operand:DI 2 "register_operand" "r")))] + "TARGET_64BIT" + "andcm %2,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r")))] + "" + "andcm %2,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_expand "iordi3" + [(set (match_operand:DI 0 "register_operand" "") + (ior:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_cint_ior_operand" "")))] + "" + " +{ + /* Both operands must be register operands. */ + if (!TARGET_64BIT && !register_operand (operands[2], DImode)) + FAIL; +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "register_operand" "r")))] + "!TARGET_64BIT" + "or %1,%2,%0\;or %R1,%R2,%R0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ior:DI (match_operand:DI 1 "register_operand" "0,0") + (match_operand:DI 2 "cint_ior_operand" "M,i")))] + "TARGET_64BIT" + "* return output_64bit_ior (operands); " + [(set_attr "type" "binary,shift") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "register_operand" "r")))] + "TARGET_64BIT" + "or %1,%2,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +;; Need a define_expand because we've run out of CONST_OK... characters. +(define_expand "iorsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ior:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "reg_or_cint_ior_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "cint_ior_operand" "M,i")))] + "" + "* return output_ior (operands); " + [(set_attr "type" "binary,shift") + (set_attr "length" "4,4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "or %1,%2,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_expand "xordi3" + [(set (match_operand:DI 0 "register_operand" "") + (xor:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")))] + "" + " +{ +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "register_operand" "r")))] + "!TARGET_64BIT" + "xor %1,%2,%0\;xor %R1,%R2,%R0" + [(set_attr "type" "binary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "register_operand" "r")))] + "TARGET_64BIT" + "xor %1,%2,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "xor %1,%2,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_expand "negdi2" + [(set (match_operand:DI 0 "register_operand" "") + (neg:DI (match_operand:DI 1 "register_operand" "")))] + "" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r")))] + "!TARGET_64BIT" + "sub %%r0,%R1,%R0\;{subb|sub,b} %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "sub %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "4")]) + +(define_expand "negvdi2" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (neg:DI (match_operand:DI 1 "register_operand" ""))) + (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1))) + (sign_extend:TI (neg:DI (match_dup 1)))) + (const_int 0))])] + "" + "") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r"))) + (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1))) + (sign_extend:TI (neg:DI (match_dup 1)))) + (const_int 0))] + "!TARGET_64BIT" + "sub %%r0,%R1,%R0\;{subbo|sub,b,tsv} %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r"))) + (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1))) + (sign_extend:TI (neg:DI (match_dup 1)))) + (const_int 0))] + "TARGET_64BIT" + "sub,tsv %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "4")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "sub %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "4")]) + +(define_insn "negvsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "register_operand" "r"))) + (trap_if (ne (neg:DI (sign_extend:DI (match_dup 1))) + (sign_extend:DI (neg:SI (match_dup 1)))) + (const_int 0))] + "" + "{subo|sub,tsv} %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "4")]) + +(define_expand "one_cmpldi2" + [(set (match_operand:DI 0 "register_operand" "") + (not:DI (match_operand:DI 1 "register_operand" "")))] + "" + " +{ +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "register_operand" "r")))] + "!TARGET_64BIT" + "uaddcm %%r0,%1,%0\;uaddcm %%r0,%R1,%R0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "uaddcm %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "4")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "uaddcm %%r0,%1,%0" + [(set_attr "type" "unary") + (set_attr "length" "4")]) + +;; Floating point arithmetic instructions. + +(define_insn "adddf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fadd,dbl %1,%2,%0" + [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") + (set_attr "length" "4")]) + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fadd,sgl %1,%2,%0" + [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") + (set_attr "length" "4")]) + +(define_insn "subdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fsub,dbl %1,%2,%0" + [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") + (set_attr "length" "4")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (minus:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fsub,sgl %1,%2,%0" + [(set_attr "type" "fpalu") + (set_attr "pa_combine_type" "faddsub") + (set_attr "length" "4")]) + +(define_insn "muldf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fmpy,dbl %1,%2,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "pa_combine_type" "fmpy") + (set_attr "length" "4")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fmpy,sgl %1,%2,%0" + [(set_attr "type" "fpmulsgl") + (set_attr "pa_combine_type" "fmpy") + (set_attr "length" "4")]) + +(define_insn "divdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fdiv,dbl %1,%2,%0" + [(set_attr "type" "fpdivdbl") + (set_attr "length" "4")]) + +(define_insn "divsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fdiv,sgl %1,%2,%0" + [(set_attr "type" "fpdivsgl") + (set_attr "length" "4")]) + +;; Processors prior to PA 2.0 don't have a fneg instruction. Fast +;; negation can be done by subtracting from plus zero. However, this +;; violates the IEEE standard when negating plus and minus zero. +;; The slow path toggles the sign bit in the general registers. +(define_expand "negdf2" + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" "")))] + "!TARGET_SOFT_FLOAT" +{ + if (TARGET_PA_20 || !flag_signed_zeros) + emit_insn (gen_negdf2_fast (operands[0], operands[1])); + else + emit_insn (gen_negdf2_slow (operands[0], operands[1])); + DONE; +}) + +(define_insn "negdf2_slow" + [(set (match_operand:DF 0 "register_operand" "=r") + (neg:DF (match_operand:DF 1 "register_operand" "r")))] + "!TARGET_SOFT_FLOAT && !TARGET_PA_20" + "* +{ + if (rtx_equal_p (operands[0], operands[1])) + return \"and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0\"; + else + return \"and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0\;copy %R1,%R0\"; +}" + [(set_attr "type" "multi") + (set (attr "length") + (if_then_else (ne (symbol_ref "rtx_equal_p (operands[0], operands[1])") + (const_int 0)) + (const_int 12) + (const_int 16)))]) + +(define_insn "negdf2_fast" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (match_operand:DF 1 "register_operand" "f")))] + "!TARGET_SOFT_FLOAT" + "* +{ + if (TARGET_PA_20) + return \"fneg,dbl %1,%0\"; + else + return \"fsub,dbl %%fr0,%1,%0\"; +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_expand "negsf2" + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" "")))] + "!TARGET_SOFT_FLOAT" +{ + if (TARGET_PA_20 || !flag_signed_zeros) + emit_insn (gen_negsf2_fast (operands[0], operands[1])); + else + emit_insn (gen_negsf2_slow (operands[0], operands[1])); + DONE; +}) + +(define_insn "negsf2_slow" + [(set (match_operand:SF 0 "register_operand" "=r") + (neg:SF (match_operand:SF 1 "register_operand" "r")))] + "!TARGET_SOFT_FLOAT && !TARGET_PA_20" + "and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "negsf2_fast" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "register_operand" "f")))] + "!TARGET_SOFT_FLOAT" + "* +{ + if (TARGET_PA_20) + return \"fneg,sgl %1,%0\"; + else + return \"fsub,sgl %%fr0,%1,%0\"; +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "absdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (abs:DF (match_operand:DF 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fabs,dbl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (abs:SF (match_operand:SF 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fabs,sgl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "sqrtdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (sqrt:DF (match_operand:DF 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fsqrt,dbl %1,%0" + [(set_attr "type" "fpsqrtdbl") + (set_attr "length" "4")]) + +(define_insn "sqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "register_operand" "f")))] + "! TARGET_SOFT_FLOAT" + "fsqrt,sgl %1,%0" + [(set_attr "type" "fpsqrtsgl") + (set_attr "length" "4")]) + +;; PA 2.0 floating point instructions + +; fmpyfadd patterns +(define_insn "fmadf4" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "TARGET_PA_20 && ! TARGET_SOFT_FLOAT" + "fmpyfadd,dbl %1,%2,%3,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "fmasf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "TARGET_PA_20 && ! TARGET_SOFT_FLOAT" + "fmpyfadd,sgl %1,%2,%3,%0" + [(set_attr "type" "fpmulsgl") + (set_attr "length" "4")]) + +; fmpynfadd patterns +(define_insn "fnmadf4" + [(set (match_operand:DF 0 "register_operand" "=f") + (fma:DF (neg:DF (match_operand:DF 1 "register_operand" "f")) + (match_operand:DF 2 "register_operand" "f") + (match_operand:DF 3 "register_operand" "f")))] + "TARGET_PA_20 && ! TARGET_SOFT_FLOAT" + "fmpynfadd,dbl %1,%2,%3,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "fnmasf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "f")))] + "TARGET_PA_20 && ! TARGET_SOFT_FLOAT" + "fmpynfadd,sgl %1,%2,%3,%0" + [(set_attr "type" "fpmulsgl") + (set_attr "length" "4")]) + +; fnegabs patterns +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))] + "TARGET_PA_20 && ! TARGET_SOFT_FLOAT" + "fnegabs,dbl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))] + "TARGET_PA_20 && ! TARGET_SOFT_FLOAT" + "fnegabs,sgl %1,%0" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f")))) + (set (match_operand:DF 2 "register_operand" "=&f") (abs:DF (match_dup 1)))] + "(! TARGET_SOFT_FLOAT && TARGET_PA_20 + && ! reg_overlap_mentioned_p (operands[2], operands[1]))" + "#" + [(set_attr "type" "fpalu") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (abs:DF (match_operand:DF 1 "register_operand" "")))) + (set (match_operand:DF 2 "register_operand" "") (abs:DF (match_dup 1)))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + [(set (match_dup 2) (abs:DF (match_dup 1))) + (set (match_dup 0) (neg:DF (abs:DF (match_dup 1))))] + "") + +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f")))) + (set (match_operand:SF 2 "register_operand" "=&f") (abs:SF (match_dup 1)))] + "(! TARGET_SOFT_FLOAT && TARGET_PA_20 + && ! reg_overlap_mentioned_p (operands[2], operands[1]))" + "#" + [(set_attr "type" "fpalu") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (abs:SF (match_operand:SF 1 "register_operand" "")))) + (set (match_operand:SF 2 "register_operand" "") (abs:SF (match_dup 1)))] + "! TARGET_SOFT_FLOAT && TARGET_PA_20" + [(set (match_dup 2) (abs:SF (match_dup 1))) + (set (match_dup 0) (neg:SF (abs:SF (match_dup 1))))] + "") + +;; Negating a multiply can be faked by adding zero in a fused multiply-add +;; instruction if we can ignore the sign of zero. +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f"))))] + "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros" + "fmpynfadd,dbl %1,%2,%%fr0,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f"))))] + "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros" + "fmpynfadd,sgl %1,%2,%%fr0,%0" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")))) + (set (match_operand:DF 3 "register_operand" "=&f") + (mult:DF (match_dup 1) (match_dup 2)))] + "(!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros + && ! (reg_overlap_mentioned_p (operands[3], operands[1]) + || reg_overlap_mentioned_p (operands[3], operands[2])))" + "#" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "register_operand" "")))) + (set (match_operand:DF 3 "register_operand" "") + (mult:DF (match_dup 1) (match_dup 2)))] + "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros" + [(set (match_dup 3) (mult:DF (match_dup 1) (match_dup 2))) + (set (match_dup 0) (neg:DF (mult:DF (match_dup 1) (match_dup 2))))] + "") + +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))) + (set (match_operand:SF 3 "register_operand" "=&f") + (mult:SF (match_dup 1) (match_dup 2)))] + "(!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros + && ! (reg_overlap_mentioned_p (operands[3], operands[1]) + || reg_overlap_mentioned_p (operands[3], operands[2])))" + "#" + [(set_attr "type" "fpmuldbl") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "register_operand" "")))) + (set (match_operand:SF 3 "register_operand" "") + (mult:SF (match_dup 1) (match_dup 2)))] + "!TARGET_SOFT_FLOAT && TARGET_PA_20&& !flag_signed_zeros" + [(set (match_dup 3) (mult:SF (match_dup 1) (match_dup 2))) + (set (match_dup 0) (neg:SF (mult:SF (match_dup 1) (match_dup 2))))] + "") + +;;- Shift instructions + +;; Optimized special case of shifting. + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "memory_operand" "m") + (const_int 24)))] + "" + "ldb%M1 %1,%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "memory_operand" "m") + (const_int 16)))] + "" + "ldh%M1 %1,%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "shadd_operand" "")) + (match_operand:SI 1 "register_operand" "r")))] + "" + "{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0} " + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (mult:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "shadd_operand" "")) + (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" + "shladd,l %2,%O3,%1,%0" + [(set_attr "type" "binary") + (set_attr "length" "4")]) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "lhs_lshift_operand" "") + (match_operand:SI 2 "arith32_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (temp, GEN_INT (31), operands[2])); + if (GET_CODE (operands[1]) == CONST_INT) + emit_insn (gen_zvdep_imm32 (operands[0], operands[1], temp)); + else + emit_insn (gen_zvdep32 (operands[0], operands[1], temp)); + DONE; + } + /* Make sure both inputs are not constants, + there are no patterns for that. */ + operands[1] = force_reg (SImode, operands[1]); +}") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")))] + "" + "{zdep|depw,z} %1,%P2,%L2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +; Match cases of op1 a CONST_INT here that zvdep_imm32 doesn't handle. +; Doing it like this makes slightly better code since reload can +; replace a register with a known value in range -16..15 with a +; constant. Ideally, we would like to merge zvdep32 and zvdep_imm32, +; but since we have no more CONST_OK... characters, that is not +; possible. +(define_insn "zvdep32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ashift:SI (match_operand:SI 1 "arith5_operand" "r,L") + (minus:SI (const_int 31) + (match_operand:SI 2 "register_operand" "q,q"))))] + "" + "@ + {zvdep %1,32,%0|depw,z %1,%%sar,32,%0} + {zvdepi %1,32,%0|depwi,z %1,%%sar,32,%0}" + [(set_attr "type" "shift,shift") + (set_attr "length" "4,4")]) + +(define_insn "zvdep_imm32" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI (match_operand:SI 1 "lhs_lshift_cint_operand" "") + (minus:SI (const_int 31) + (match_operand:SI 2 "register_operand" "q"))))] + "" + "* +{ + unsigned HOST_WIDE_INT x = UINTVAL (operands[1]); + operands[2] = GEN_INT (4 + exact_log2 ((x >> 4) + 1)); + operands[1] = GEN_INT ((x & 0xf) - 0x10); + return \"{zvdepi %1,%2,%0|depwi,z %1,%%sar,%2,%0}\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "vdepi_ior" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "") + (minus:SI (const_int 31) + (match_operand:SI 2 "register_operand" "q"))) + (match_operand:SI 3 "register_operand" "0")))] + ; accept ...0001...1, can this be generalized? + "exact_log2 (INTVAL (operands[1]) + 1) > 0" + "* +{ + HOST_WIDE_INT x = INTVAL (operands[1]); + operands[2] = GEN_INT (exact_log2 (x + 1)); + return \"{vdepi -1,%2,%0|depwi -1,%%sar,%2,%0}\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "vdepi_and" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (rotate:SI (match_operand:SI 1 "const_int_operand" "") + (minus:SI (const_int 31) + (match_operand:SI 2 "register_operand" "q"))) + (match_operand:SI 3 "register_operand" "0")))] + ; this can be generalized...! + "INTVAL (operands[1]) == -2" + "* +{ + HOST_WIDE_INT x = INTVAL (operands[1]); + operands[2] = GEN_INT (exact_log2 ((~x) + 1)); + return \"{vdepi 0,%2,%0|depwi 0,%%sar,%2,%0}\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "lhs_lshift_operand" "") + (match_operand:DI 2 "arith32_operand" "")))] + "TARGET_64BIT" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx temp = gen_reg_rtx (DImode); + emit_insn (gen_subdi3 (temp, GEN_INT (63), operands[2])); + if (GET_CODE (operands[1]) == CONST_INT) + emit_insn (gen_zvdep_imm64 (operands[0], operands[1], temp)); + else + emit_insn (gen_zvdep64 (operands[0], operands[1], temp)); + DONE; + } + /* Make sure both inputs are not constants, + there are no patterns for that. */ + operands[1] = force_reg (DImode, operands[1]); +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_64BIT" + "depd,z %1,%p2,%Q2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +; Match cases of op1 a CONST_INT here that zvdep_imm64 doesn't handle. +; Doing it like this makes slightly better code since reload can +; replace a register with a known value in range -16..15 with a +; constant. Ideally, we would like to merge zvdep64 and zvdep_imm64, +; but since we have no more CONST_OK... characters, that is not +; possible. +(define_insn "zvdep64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "arith5_operand" "r,L") + (minus:DI (const_int 63) + (match_operand:DI 2 "register_operand" "q,q"))))] + "TARGET_64BIT" + "@ + depd,z %1,%%sar,64,%0 + depdi,z %1,%%sar,64,%0" + [(set_attr "type" "shift,shift") + (set_attr "length" "4,4")]) + +(define_insn "zvdep_imm64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "lhs_lshift_cint_operand" "") + (minus:DI (const_int 63) + (match_operand:DI 2 "register_operand" "q"))))] + "TARGET_64BIT" + "* +{ + unsigned HOST_WIDE_INT x = UINTVAL (operands[1]); + operands[2] = GEN_INT (4 + exact_log2 ((x >> 4) + 1)); + operands[1] = GEN_INT ((x & 0x1f) - 0x20); + return \"depdi,z %1,%%sar,%2,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "const_int_operand" "") + (minus:DI (const_int 63) + (match_operand:DI 2 "register_operand" "q"))) + (match_operand:DI 3 "register_operand" "0")))] + ; accept ...0001...1, can this be generalized? + "TARGET_64BIT && exact_log2 (INTVAL (operands[1]) + 1) > 0" + "* +{ + HOST_WIDE_INT x = INTVAL (operands[1]); + operands[2] = GEN_INT (exact_log2 (x + 1)); + return \"depdi -1,%%sar,%2,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (rotate:DI (match_operand:DI 1 "const_int_operand" "") + (minus:DI (const_int 63) + (match_operand:DI 2 "register_operand" "q"))) + (match_operand:DI 3 "register_operand" "0")))] + ; this can be generalized...! + "TARGET_64BIT && INTVAL (operands[1]) == -2" + "* +{ + HOST_WIDE_INT x = INTVAL (operands[1]); + operands[2] = GEN_INT (exact_log2 ((~x) + 1)); + return \"depdi 0,%%sar,%2,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arith32_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (temp, GEN_INT (31), operands[2])); + emit_insn (gen_vextrs32 (operands[0], operands[1], temp)); + DONE; + } +}") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")))] + "" + "{extrs|extrw,s} %1,%P2,%L2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "vextrs32" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (const_int 31) + (match_operand:SI 2 "register_operand" "q"))))] + "" + "{vextrs %1,32,%0|extrw,s %1,%%sar,32,%0}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "arith32_operand" "")))] + "TARGET_64BIT" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx temp = gen_reg_rtx (DImode); + emit_insn (gen_subdi3 (temp, GEN_INT (63), operands[2])); + emit_insn (gen_vextrs64 (operands[0], operands[1], temp)); + DONE; + } +}") + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_64BIT" + "extrd,s %1,%p2,%Q2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "vextrs64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (minus:DI (const_int 63) + (match_operand:DI 2 "register_operand" "q"))))] + "TARGET_64BIT" + "extrd,s %1,%%sar,64,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith32_operand" "q,n")))] + "" + "@ + {vshd %%r0,%1,%0|shrpw %%r0,%1,%%sar,%0} + {extru|extrw,u} %1,%P2,%L2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r") + (match_operand:DI 2 "arith32_operand" "q,n")))] + "TARGET_64BIT" + "@ + shrpd %%r0,%1,%%sar,%0 + extrd,u %1,%p2,%Q2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (rotatert:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith32_operand" "q,n")))] + "" + "* +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + operands[2] = GEN_INT (INTVAL (operands[2]) & 31); + return \"{shd|shrpw} %1,%1,%2,%0\"; + } + else + return \"{vshd %1,%1,%0|shrpw %1,%1,%%sar,%0}\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (rotate:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arith32_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (temp, GEN_INT (32), operands[2])); + emit_insn (gen_rotrsi3 (operands[0], operands[1], temp)); + DONE; + } + /* Else expand normally. */ +}") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotate:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")))] + "" + "* +{ + operands[2] = GEN_INT ((32 - INTVAL (operands[2])) & 31); + return \"{shd|shrpw} %1,%1,%2,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 5 "plus_xor_ior_operator" + [(ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 4 "const_int_operand" "n"))]))] + "INTVAL (operands[3]) + INTVAL (operands[4]) == 32" + "{shd|shrpw} %1,%2,%4,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 5 "plus_xor_ior_operator" + [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 4 "const_int_operand" "n")) + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n"))]))] + "INTVAL (operands[3]) + INTVAL (operands[4]) == 32" + "{shd|shrpw} %1,%2,%4,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")))] + "exact_log2 (1 + (INTVAL (operands[3]) >> (INTVAL (operands[2]) & 31))) > 0" + "* +{ + int cnt = INTVAL (operands[2]) & 31; + operands[3] = GEN_INT (exact_log2 (1 + (INTVAL (operands[3]) >> cnt))); + operands[2] = GEN_INT (31 - cnt); + return \"{zdep|depw,z} %1,%2,%3,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +;; Unconditional and other jump instructions. + +;; Trivial return used when no epilogue is needed. +(define_insn "return" + [(return) + (use (reg:SI 2))] + "pa_can_use_return_insn ()" + "* +{ + if (TARGET_PA_20) + return \"bve%* (%%r2)\"; + return \"bv%* %%r0(%%r2)\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +;; This is used for most returns. +(define_insn "return_internal" + [(return) + (use (reg:SI 2))] + "" + "* +{ + if (TARGET_PA_20) + return \"bve%* (%%r2)\"; + return \"bv%* %%r0(%%r2)\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +;; This is used for eh returns which bypass the return stub. +(define_insn "return_external_pic" + [(return) + (clobber (reg:SI 1)) + (use (reg:SI 2))] + "!TARGET_NO_SPACE_REGS + && !TARGET_PA_20 + && flag_pic && crtl->calls_eh_return" + "ldsid (%%sr0,%%r2),%%r1\;mtsp %%r1,%%sr0\;be%* 0(%%sr0,%%r2)" + [(set_attr "type" "branch") + (set_attr "length" "12")]) + +(define_expand "prologue" + [(const_int 0)] + "" + "hppa_expand_prologue ();DONE;") + +(define_expand "sibcall_epilogue" + [(return)] + "" + " +{ + hppa_expand_epilogue (); + DONE; +}") + +(define_expand "epilogue" + [(return)] + "" + " +{ + rtx x; + + /* Try to use the trivial return first. Else use the full epilogue. */ + if (pa_can_use_return_insn ()) + x = gen_return (); + else + { + hppa_expand_epilogue (); + + /* EH returns bypass the normal return stub. Thus, we must do an + interspace branch to return from functions that call eh_return. + This is only a problem for returns from shared code on ports + using space registers. */ + if (!TARGET_NO_SPACE_REGS + && !TARGET_PA_20 + && flag_pic && crtl->calls_eh_return) + x = gen_return_external_pic (); + else + x = gen_return_internal (); + } + emit_jump_insn (x); + DONE; +}") + +; Used by hppa_profile_hook to load the starting address of the current +; function; operand 1 contains the address of the label in operand 3 +(define_insn "load_offset_label_address" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (match_operand:SI 2 "" "") + (label_ref:SI (match_operand 3 "" "")))))] + "" + "ldo %2-%l3(%1),%0" + [(set_attr "type" "multi") + (set_attr "length" "4")]) + +; Output a code label and load its address. +(define_insn "lcla1" + [(set (match_operand:SI 0 "register_operand" "=r") + (label_ref:SI (match_operand 1 "" ""))) + (const_int 0)] + "!TARGET_PA_20" + "* +{ + output_asm_insn (\"bl .+8,%0\;depi 0,31,2,%0\", operands); + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (operands[1])); + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "lcla2" + [(set (match_operand:SI 0 "register_operand" "=r") + (label_ref:SI (match_operand 1 "" ""))) + (const_int 0)] + "TARGET_PA_20" + "* +{ + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (operands[1])); + return \"mfia %0\"; +}" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn "blockage" + [(unspec_volatile [(const_int 2)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "* +{ + /* An unconditional branch which can reach its target. */ + if (get_attr_length (insn) < 16) + return \"b%* %l0\"; + + return output_lbranch (operands[0], insn, 1); +}" + [(set_attr "type" "uncond_branch") + (set_attr "pa_combine_type" "uncond_branch") + (set (attr "length") + (cond [(eq (symbol_ref "jump_in_call_delay (insn)") (const_int 1)) + (if_then_else (lt (abs (minus (match_dup 0) + (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (const_int 8)) + (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 4) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 20) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 16)] + (const_int 24)))]) + +;;; Hope this is only within a function... +(define_insn "indirect_jump" + [(set (pc) (match_operand 0 "register_operand" "r"))] + "GET_MODE (operands[0]) == word_mode" + "bv%* %%r0(%0)" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +;;; An indirect jump can be optimized to a direct jump. GAS for the +;;; SOM target doesn't allow branching to a label inside a function. +;;; We also don't correctly compute branch distances for labels +;;; outside the current function. Thus, we use an indirect jump can't +;;; be optimized to a direct jump for all targets. We assume that +;;; the branch target is in the same space (i.e., nested function +;;; jumping to a label in an outer function in the same translation +;;; unit). +(define_expand "nonlocal_goto" + [(use (match_operand 0 "general_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" "")) + (use (match_operand 3 "general_operand" ""))] + "" +{ + rtx lab = operands[1]; + rtx stack = operands[2]; + rtx fp = operands[3]; + + lab = copy_to_reg (lab); + + emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); + emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); + + /* Restore the frame pointer. The virtual_stack_vars_rtx is saved + instead of the hard_frame_pointer_rtx in the save area. As a + result, an extra instruction is needed to adjust for the offset + of the virtual stack variables and the hard frame pointer. */ + if (GET_CODE (fp) != REG) + fp = force_reg (Pmode, fp); + emit_move_insn (hard_frame_pointer_rtx, plus_constant (fp, -8)); + + emit_stack_restore (SAVE_NONLOCAL, stack); + + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + /* Nonlocal goto jumps are only used between functions in the same + translation unit. Thus, we can avoid the extra overhead of an + interspace jump. */ + emit_jump_insn (gen_indirect_goto (lab)); + emit_barrier (); + DONE; +}) + +(define_insn "indirect_goto" + [(unspec [(match_operand 0 "register_operand" "=r")] UNSPEC_GOTO)] + "GET_MODE (operands[0]) == word_mode" + "bv%* %%r0(%0)" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +;;; This jump is used in branch tables where the insn length is fixed. +;;; The length of this insn is adjusted if the delay slot is not filled. +(define_insn "short_jump" + [(set (pc) (label_ref (match_operand 0 "" ""))) + (const_int 0)] + "" + "b%* %l0%#" + [(set_attr "type" "btable_branch") + (set_attr "length" "4")]) + +;; Subroutines of "casesi". +;; operand 0 is index +;; operand 1 is the minimum bound +;; operand 2 is the maximum bound - minimum bound + 1 +;; operand 3 is CODE_LABEL for the table; +;; operand 4 is the CODE_LABEL to go to if index out of range. + +(define_expand "casesi" + [(match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand 3 "" "") + (match_operand 4 "" "")] + "" + " +{ + if (GET_CODE (operands[0]) != REG) + operands[0] = force_reg (SImode, operands[0]); + + if (operands[1] != const0_rtx) + { + rtx index = gen_reg_rtx (SImode); + + operands[1] = gen_int_mode (-INTVAL (operands[1]), SImode); + if (!INT_14_BITS (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_addsi3 (index, operands[0], operands[1])); + operands[0] = index; + } + + if (!INT_5_BITS (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + + /* This branch prevents us finding an insn for the delay slot of the + following vectored branch. It might be possible to use the delay + slot if an index value of -1 was used to transfer to the out-of-range + label. In order to do this, we would have to output the -1 vector + element after the delay insn. The casesi output code would have to + check if the casesi insn is in a delay branch sequence and output + the delay insn if one is found. If this was done, then it might + then be worthwhile to split the casesi patterns to improve scheduling. + However, it's not clear that all this extra complexity is worth + the effort. */ + { + rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]); + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4])); + } + + /* In 64bit mode we must make sure to wipe the upper bits of the register + just in case the addition overflowed or we had random bits in the + high part of the register. */ + if (TARGET_64BIT) + { + rtx index = gen_reg_rtx (DImode); + + emit_insn (gen_extendsidi2 (index, operands[0])); + operands[0] = index; + } + + if (TARGET_BIG_SWITCH) + { + if (TARGET_64BIT) + emit_jump_insn (gen_casesi64p (operands[0], operands[3])); + else if (flag_pic) + emit_jump_insn (gen_casesi32p (operands[0], operands[3])); + else + emit_jump_insn (gen_casesi32 (operands[0], operands[3])); + } + else + emit_jump_insn (gen_casesi0 (operands[0], operands[3])); + DONE; +}") + +;;; The rtl for this pattern doesn't accurately describe what the insn +;;; actually does, particularly when case-vector elements are exploded +;;; in pa_reorg. However, the initial SET in these patterns must show +;;; the connection of the insn to the following jump table. +(define_insn "casesi0" + [(set (pc) (mem:SI (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r") + (const_int 4)) + (label_ref (match_operand 1 "" "")))))] + "" + "blr,n %0,%%r0\;nop" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +;;; 32-bit code, absolute branch table. +(define_insn "casesi32" + [(set (pc) (mem:SI (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r") + (const_int 4)) + (label_ref (match_operand 1 "" ""))))) + (clobber (match_scratch:SI 2 "=&r"))] + "!flag_pic" + "ldil L'%l1,%2\;ldo R'%l1(%2),%2\;{ldwx|ldw},s %0(%2),%2\;bv,n %%r0(%2)" + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +;;; 32-bit code, relative branch table. +(define_insn "casesi32p" + [(set (pc) (mem:SI (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r") + (const_int 4)) + (label_ref (match_operand 1 "" ""))))) + (clobber (match_scratch:SI 2 "=&r")) + (clobber (match_scratch:SI 3 "=&r"))] + "flag_pic" + "{bl .+8,%2\;depi 0,31,2,%2|mfia %2}\;ldo {%l1-.|%l1+4-.}(%2),%2\;\ +{ldwx|ldw},s %0(%2),%3\;{addl|add,l} %2,%3,%3\;bv,n %%r0(%3)" + [(set_attr "type" "multi") + (set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_PA_20") (const_int 0)) + (const_int 20) + (const_int 24)))]) + +;;; 64-bit code, 32-bit relative branch table. +(define_insn "casesi64p" + [(set (pc) (mem:DI (plus:DI + (mult:DI (match_operand:DI 0 "register_operand" "r") + (const_int 8)) + (label_ref (match_operand 1 "" ""))))) + (clobber (match_scratch:DI 2 "=&r")) + (clobber (match_scratch:DI 3 "=&r"))] + "" + "mfia %2\;ldo %l1+4-.(%2),%2\;ldw,s %0(%2),%3\;extrd,s %3,63,32,%3\;\ +add,l %2,%3,%3\;bv,n %%r0(%3)" + [(set_attr "type" "multi") + (set_attr "length" "24")]) + + +;; Call patterns. +;;- jump to subroutine + +(define_expand "call" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (clobber (reg:SI 2))])] + "" + " +{ + rtx op; + rtx nb = operands[1]; + + if (TARGET_PORTABLE_RUNTIME) + op = force_reg (SImode, XEXP (operands[0], 0)); + else + op = XEXP (operands[0], 0); + + if (TARGET_64BIT) + { + if (!virtuals_instantiated) + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + else + { + /* The loop pass can generate new libcalls after the virtual + registers are instantiated when fpregs are disabled because + the only method that we have for doing DImode multiplication + is with a libcall. This could be trouble if we haven't + allocated enough space for the outgoing arguments. */ + gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size); + + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (STACK_POINTER_OFFSET + 64))); + } + } + + /* Use two different patterns for calls to explicitly named functions + and calls through function pointers. This is necessary as these two + types of calls use different calling conventions, and CSE might try + to change the named call into an indirect call in some cases (using + two patterns keeps CSE from performing this optimization). + + We now use even more call patterns as there was a subtle bug in + attempting to restore the pic register after a call using a simple + move insn. During reload, a instruction involving a pseudo register + with no explicit dependence on the PIC register can be converted + to an equivalent load from memory using the PIC register. If we + emit a simple move to restore the PIC register in the initial rtl + generation, then it can potentially be repositioned during scheduling. + and an instruction that eventually uses the PIC register may end up + between the call and the PIC register restore. + + This only worked because there is a post call group of instructions + that are scheduled with the call. These instructions are included + in the same basic block as the call. However, calls can throw in + C++ code and a basic block has to terminate at the call if the call + can throw. This results in the PIC register restore being scheduled + independently from the call. So, we now hide the save and restore + of the PIC register in the call pattern until after reload. Then, + we split the moves out. A small side benefit is that we now don't + need to have a use of the PIC register in the return pattern and + the final save/restore operation is not needed. + + I elected to just use register %r4 in the PIC patterns instead + of trying to force hppa_pic_save_rtx () to a callee saved register. + This might have required a new register class and constraint. It + was also simpler to just handle the restore from a register than a + generic pseudo. */ + if (TARGET_64BIT) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + if (GET_CODE (op) == SYMBOL_REF) + emit_call_insn (gen_call_symref_64bit (op, nb, r4)); + else + { + op = force_reg (word_mode, op); + emit_call_insn (gen_call_reg_64bit (op, nb, r4)); + } + } + else + { + if (GET_CODE (op) == SYMBOL_REF) + { + if (flag_pic) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_call_insn (gen_call_symref_pic (op, nb, r4)); + } + else + emit_call_insn (gen_call_symref (op, nb)); + } + else + { + rtx tmpreg = gen_rtx_REG (word_mode, 22); + emit_move_insn (tmpreg, force_reg (word_mode, op)); + if (flag_pic) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_call_insn (gen_call_reg_pic (nb, r4)); + } + else + emit_call_insn (gen_call_reg (nb)); + } + } + + DONE; +}") + +;; We use function calls to set the attribute length of calls and millicode +;; calls. This is necessary because of the large variety of call sequences. +;; Implementing the calculation in rtl is difficult as well as ugly. As +;; we need the same calculation in several places, maintenance becomes a +;; nightmare. +;; +;; However, this has a subtle impact on branch shortening. When the +;; expression used to set the length attribute of an instruction depends +;; on a relative address (e.g., pc or a branch address), genattrtab +;; notes that the insn's length is variable, and attempts to determine a +;; worst-case default length and code to compute an insn's current length. + +;; The use of a function call hides the variable dependence of our calls +;; and millicode calls. The result is genattrtab doesn't treat the operation +;; as variable and it only generates code for the default case using our +;; function call. Because of this, calls and millicode calls have a fixed +;; length in the branch shortening pass, and some branches will use a longer +;; code sequence than necessary. However, the length of any given call +;; will still reflect its final code location and it may be shorter than +;; the initial length estimate. + +;; It's possible to trick genattrtab by adding an expression involving `pc' +;; in the set. However, when genattrtab hits a function call in its attempt +;; to compute the default length, it marks the result as unknown and sets +;; the default result to MAX_INT ;-( One possible fix that would allow +;; calls to participate in branch shortening would be to make the call to +;; insn_default_length a target option. Then, we could massage unknown +;; results. Another fix might be to change genattrtab so that it just does +;; the call in the variable case as it already does for the fixed case. + +(define_insn "call_symref" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[0], 0); +}" + [(set_attr "type" "call") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 0)")))]) + +(define_insn "call_symref_pic" + [(set (match_operand:SI 2 "register_operand" "=&r") (reg:SI 19)) + (call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 2)) + (use (reg:SI 19)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19)) + (call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 2)) + (use (reg:SI 19)) + (use (const_int 0))])] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 2) (reg:SI 19)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 0))])] + "") + +(define_split + [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19)) + (call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 2)) + (use (reg:SI 19)) + (use (const_int 0))])] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed" + [(set (match_dup 2) (reg:SI 19)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 0))]) + (set (reg:SI 19) (match_dup 2))] + "") + +(define_insn "*call_symref_pic_post_reload" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[0], 0); +}" + [(set_attr "type" "call") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 0)")))]) + +;; This pattern is split if it is necessary to save and restore the +;; PIC register. +(define_insn "call_symref_64bit" + [(set (match_operand:DI 2 "register_operand" "=&r") (reg:DI 27)) + (call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))] + "TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27)) + (call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))])] + "TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 2) (reg:DI 27)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))])] + "") + +(define_split + [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27)) + (call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))])] + "TARGET_64BIT && reload_completed" + [(set (match_dup 2) (reg:DI 27)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))]) + (set (reg:DI 27) (match_dup 2))] + "") + +(define_insn "*call_symref_64bit_post_reload" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))] + "TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[0], 0); +}" + [(set_attr "type" "call") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 0)")))]) + +(define_insn "call_reg" + [(call (mem:SI (reg:SI 22)) + (match_operand 0 "" "i")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (const_int 1))] + "!TARGET_64BIT" + "* +{ + return output_indirect_call (insn, gen_rtx_REG (word_mode, 22)); +}" + [(set_attr "type" "dyncall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_indirect_call (insn)")))]) + +;; This pattern is split if it is necessary to save and restore the +;; PIC register. +(define_insn "call_reg_pic" + [(set (match_operand:SI 1 "register_operand" "=&r") (reg:SI 19)) + (call (mem:SI (reg:SI 22)) + (match_operand 0 "" "i")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 1)) + (use (reg:SI 19)) + (use (const_int 1))] + "!TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:SI 1 "register_operand" "") (reg:SI 19)) + (call (mem:SI (reg:SI 22)) + (match_operand 0 "" "")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 1)) + (use (reg:SI 19)) + (use (const_int 1))])] + "!TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 1) (reg:SI 19)) + (parallel [(call (mem:SI (reg:SI 22)) + (match_dup 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 1))])] + "") + +(define_split + [(parallel [(set (match_operand:SI 1 "register_operand" "") (reg:SI 19)) + (call (mem:SI (reg:SI 22)) + (match_operand 0 "" "")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 1)) + (use (reg:SI 19)) + (use (const_int 1))])] + "!TARGET_64BIT && reload_completed" + [(set (match_dup 1) (reg:SI 19)) + (parallel [(call (mem:SI (reg:SI 22)) + (match_dup 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 1))]) + (set (reg:SI 19) (match_dup 1))] + "") + +(define_insn "*call_reg_pic_post_reload" + [(call (mem:SI (reg:SI 22)) + (match_operand 0 "" "i")) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 1))] + "!TARGET_64BIT" + "* +{ + return output_indirect_call (insn, gen_rtx_REG (word_mode, 22)); +}" + [(set_attr "type" "dyncall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_indirect_call (insn)")))]) + +;; This pattern is split if it is necessary to save and restore the +;; PIC register. +(define_insn "call_reg_64bit" + [(set (match_operand:DI 2 "register_operand" "=&r") (reg:DI 27)) + (call (mem:SI (match_operand:DI 0 "register_operand" "r")) + (match_operand 1 "" "i")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))] + "TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27)) + (call (mem:SI (match_operand 0 "register_operand" "")) + (match_operand 1 "" "")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))])] + "TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 2) (reg:DI 27)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))])] + "") + +(define_split + [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27)) + (call (mem:SI (match_operand 0 "register_operand" "")) + (match_operand 1 "" "")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))])] + "TARGET_64BIT && reload_completed" + [(set (match_dup 2) (reg:DI 27)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))]) + (set (reg:DI 27) (match_dup 2))] + "") + +(define_insn "*call_reg_64bit_post_reload" + [(call (mem:SI (match_operand:DI 0 "register_operand" "r")) + (match_operand 1 "" "i")) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))] + "TARGET_64BIT" + "* +{ + return output_indirect_call (insn, operands[0]); +}" + [(set_attr "type" "dyncall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)] + (symbol_ref "attr_length_indirect_call (insn)")))]) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand:SI 1 "" "") + (match_operand 2 "" ""))) + (clobber (reg:SI 2))])] + "" + " +{ + rtx op; + rtx dst = operands[0]; + rtx nb = operands[2]; + + if (TARGET_PORTABLE_RUNTIME) + op = force_reg (SImode, XEXP (operands[1], 0)); + else + op = XEXP (operands[1], 0); + + if (TARGET_64BIT) + { + if (!virtuals_instantiated) + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + else + { + /* The loop pass can generate new libcalls after the virtual + registers are instantiated when fpregs are disabled because + the only method that we have for doing DImode multiplication + is with a libcall. This could be trouble if we haven't + allocated enough space for the outgoing arguments. */ + gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size); + + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (STACK_POINTER_OFFSET + 64))); + } + } + + /* Use two different patterns for calls to explicitly named functions + and calls through function pointers. This is necessary as these two + types of calls use different calling conventions, and CSE might try + to change the named call into an indirect call in some cases (using + two patterns keeps CSE from performing this optimization). + + We now use even more call patterns as there was a subtle bug in + attempting to restore the pic register after a call using a simple + move insn. During reload, a instruction involving a pseudo register + with no explicit dependence on the PIC register can be converted + to an equivalent load from memory using the PIC register. If we + emit a simple move to restore the PIC register in the initial rtl + generation, then it can potentially be repositioned during scheduling. + and an instruction that eventually uses the PIC register may end up + between the call and the PIC register restore. + + This only worked because there is a post call group of instructions + that are scheduled with the call. These instructions are included + in the same basic block as the call. However, calls can throw in + C++ code and a basic block has to terminate at the call if the call + can throw. This results in the PIC register restore being scheduled + independently from the call. So, we now hide the save and restore + of the PIC register in the call pattern until after reload. Then, + we split the moves out. A small side benefit is that we now don't + need to have a use of the PIC register in the return pattern and + the final save/restore operation is not needed. + + I elected to just use register %r4 in the PIC patterns instead + of trying to force hppa_pic_save_rtx () to a callee saved register. + This might have required a new register class and constraint. It + was also simpler to just handle the restore from a register than a + generic pseudo. */ + if (TARGET_64BIT) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + if (GET_CODE (op) == SYMBOL_REF) + emit_call_insn (gen_call_val_symref_64bit (dst, op, nb, r4)); + else + { + op = force_reg (word_mode, op); + emit_call_insn (gen_call_val_reg_64bit (dst, op, nb, r4)); + } + } + else + { + if (GET_CODE (op) == SYMBOL_REF) + { + if (flag_pic) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_call_insn (gen_call_val_symref_pic (dst, op, nb, r4)); + } + else + emit_call_insn (gen_call_val_symref (dst, op, nb)); + } + else + { + rtx tmpreg = gen_rtx_REG (word_mode, 22); + emit_move_insn (tmpreg, force_reg (word_mode, op)); + if (flag_pic) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_call_insn (gen_call_val_reg_pic (dst, nb, r4)); + } + else + emit_call_insn (gen_call_val_reg (dst, nb)); + } + } + + DONE; +}") + +(define_insn "call_val_symref" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[1], 0); +}" + [(set_attr "type" "call") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 0)")))]) + +(define_insn "call_val_symref_pic" + [(set (match_operand:SI 3 "register_operand" "=&r") (reg:SI 19)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 3)) + (use (reg:SI 19)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:SI 3 "register_operand" "") (reg:SI 19)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" ""))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 3)) + (use (reg:SI 19)) + (use (const_int 0))])] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 3) (reg:SI 19)) + (parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 0))])] + "") + +(define_split + [(parallel [(set (match_operand:SI 3 "register_operand" "") (reg:SI 19)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" ""))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 3)) + (use (reg:SI 19)) + (use (const_int 0))])] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:SI 19)) + (parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 0))]) + (set (reg:SI 19) (match_dup 3))] + "") + +(define_insn "*call_val_symref_pic_post_reload" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[1], 0); +}" + [(set_attr "type" "call") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 0)")))]) + +;; This pattern is split if it is necessary to save and restore the +;; PIC register. +(define_insn "call_val_symref_64bit" + [(set (match_operand:DI 3 "register_operand" "=&r") (reg:DI 27)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 3)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))] + "TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" ""))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 3)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))])] + "TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 3) (reg:DI 27)) + (parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))])] + "") + +(define_split + [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" ""))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 3)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))])] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:DI 27)) + (parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))]) + (set (reg:DI 27) (match_dup 3))] + "") + +(define_insn "*call_val_symref_64bit_post_reload" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 0))] + "TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[1], 0); +}" + [(set_attr "type" "call") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 0)")))]) + +(define_insn "call_val_reg" + [(set (match_operand 0 "" "") + (call (mem:SI (reg:SI 22)) + (match_operand 1 "" "i"))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (const_int 1))] + "!TARGET_64BIT" + "* +{ + return output_indirect_call (insn, gen_rtx_REG (word_mode, 22)); +}" + [(set_attr "type" "dyncall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_indirect_call (insn)")))]) + +;; This pattern is split if it is necessary to save and restore the +;; PIC register. +(define_insn "call_val_reg_pic" + [(set (match_operand:SI 2 "register_operand" "=&r") (reg:SI 19)) + (set (match_operand 0 "" "") + (call (mem:SI (reg:SI 22)) + (match_operand 1 "" "i"))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 2)) + (use (reg:SI 19)) + (use (const_int 1))] + "!TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19)) + (set (match_operand 0 "" "") + (call (mem:SI (reg:SI 22)) + (match_operand 1 "" ""))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 2)) + (use (reg:SI 19)) + (use (const_int 1))])] + "!TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 2) (reg:SI 19)) + (parallel [(set (match_dup 0) + (call (mem:SI (reg:SI 22)) + (match_dup 1))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 1))])] + "") + +(define_split + [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19)) + (set (match_operand 0 "" "") + (call (mem:SI (reg:SI 22)) + (match_operand 1 "" ""))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (match_dup 2)) + (use (reg:SI 19)) + (use (const_int 1))])] + "!TARGET_64BIT && reload_completed" + [(set (match_dup 2) (reg:SI 19)) + (parallel [(set (match_dup 0) + (call (mem:SI (reg:SI 22)) + (match_dup 1))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 1))]) + (set (reg:SI 19) (match_dup 2))] + "") + +(define_insn "*call_val_reg_pic_post_reload" + [(set (match_operand 0 "" "") + (call (mem:SI (reg:SI 22)) + (match_operand 1 "" "i"))) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (use (reg:SI 19)) + (use (const_int 1))] + "!TARGET_64BIT" + "* +{ + return output_indirect_call (insn, gen_rtx_REG (word_mode, 22)); +}" + [(set_attr "type" "dyncall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_indirect_call (insn)")))]) + +;; This pattern is split if it is necessary to save and restore the +;; PIC register. +(define_insn "call_val_reg_64bit" + [(set (match_operand:DI 3 "register_operand" "=&r") (reg:DI 27)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "register_operand" "r")) + (match_operand 2 "" "i"))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 3)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))] + "TARGET_64BIT" + "#") + +;; Split out the PIC register save and restore after reload. As the +;; split is done after reload, there are some situations in which we +;; unnecessarily save and restore %r4. This happens when there is a +;; single call and the PIC register is not used after the call. +;; +;; The split has to be done since call_from_call_insn () can't handle +;; the pattern as is. Noreturn calls are special because they have to +;; terminate the basic block. The split has to contain more than one +;; insn. +(define_split + [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "register_operand" "")) + (match_operand 2 "" ""))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 3)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))])] + "TARGET_64BIT && reload_completed + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + [(set (match_dup 3) (reg:DI 27)) + (parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))])] + "") + +(define_split + [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27)) + (set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "register_operand" "")) + (match_operand 2 "" ""))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (match_dup 3)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))])] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:DI 27)) + (parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))]) + (set (reg:DI 27) (match_dup 3))] + "") + +(define_insn "*call_val_reg_64bit_post_reload" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "register_operand" "r")) + (match_operand 2 "" "i"))) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29)) + (use (const_int 1))] + "TARGET_64BIT" + "* +{ + return output_indirect_call (insn, operands[1]); +}" + [(set_attr "type" "dyncall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)] + (symbol_ref "attr_length_indirect_call (insn)")))]) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" + " +{ + int i; + + emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}") + +(define_expand "sibcall" + [(call (match_operand:SI 0 "" "") + (match_operand 1 "" ""))] + "!TARGET_PORTABLE_RUNTIME" + " +{ + rtx op, call_insn; + rtx nb = operands[1]; + + op = XEXP (operands[0], 0); + + if (TARGET_64BIT) + { + if (!virtuals_instantiated) + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + else + { + /* The loop pass can generate new libcalls after the virtual + registers are instantiated when fpregs are disabled because + the only method that we have for doing DImode multiplication + is with a libcall. This could be trouble if we haven't + allocated enough space for the outgoing arguments. */ + gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size); + + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (STACK_POINTER_OFFSET + 64))); + } + } + + /* Indirect sibling calls are not allowed. */ + if (TARGET_64BIT) + call_insn = gen_sibcall_internal_symref_64bit (op, operands[1]); + else + call_insn = gen_sibcall_internal_symref (op, operands[1]); + + call_insn = emit_call_insn (call_insn); + + if (TARGET_64BIT) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx); + + /* We don't have to restore the PIC register. */ + if (flag_pic) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx); + + DONE; +}") + +(define_insn "sibcall_internal_symref" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:SI 1)) + (use (reg:SI 2)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[0], 1); +}" + [(set_attr "type" "sibcall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 1)")))]) + +(define_insn "sibcall_internal_symref_64bit" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (clobber (reg:DI 1)) + (use (reg:DI 2)) + (use (const_int 0))] + "TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[0], 1); +}" + [(set_attr "type" "sibcall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 1)")))]) + +(define_expand "sibcall_value" + [(set (match_operand 0 "" "") + (call (match_operand:SI 1 "" "") + (match_operand 2 "" "")))] + "!TARGET_PORTABLE_RUNTIME" + " +{ + rtx op, call_insn; + rtx nb = operands[1]; + + op = XEXP (operands[1], 0); + + if (TARGET_64BIT) + { + if (!virtuals_instantiated) + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + else + { + /* The loop pass can generate new libcalls after the virtual + registers are instantiated when fpregs are disabled because + the only method that we have for doing DImode multiplication + is with a libcall. This could be trouble if we haven't + allocated enough space for the outgoing arguments. */ + gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size); + + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (STACK_POINTER_OFFSET + 64))); + } + } + + /* Indirect sibling calls are not allowed. */ + if (TARGET_64BIT) + call_insn + = gen_sibcall_value_internal_symref_64bit (operands[0], op, operands[2]); + else + call_insn + = gen_sibcall_value_internal_symref (operands[0], op, operands[2]); + + call_insn = emit_call_insn (call_insn); + + if (TARGET_64BIT) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx); + + /* We don't have to restore the PIC register. */ + if (flag_pic) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx); + + DONE; +}") + +(define_insn "sibcall_value_internal_symref" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:SI 1)) + (use (reg:SI 2)) + (use (const_int 0))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[1], 1); +}" + [(set_attr "type" "sibcall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 1)")))]) + +(define_insn "sibcall_value_internal_symref_64bit" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "call_operand_address" "")) + (match_operand 2 "" "i"))) + (clobber (reg:DI 1)) + (use (reg:DI 2)) + (use (const_int 0))] + "TARGET_64BIT" + "* +{ + output_arg_descriptor (insn); + return output_call (insn, operands[1], 1); +}" + [(set_attr "type" "sibcall") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)] + (symbol_ref "attr_length_call (insn, 1)")))]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +;; These are just placeholders so we know where branch tables +;; begin and end. +(define_insn "begin_brtab" + [(const_int 1)] + "" + "* +{ + /* Only GAS actually supports this pseudo-op. */ + if (TARGET_GAS) + return \".begin_brtab\"; + else + return \"\"; +}" + [(set_attr "type" "move") + (set_attr "length" "0")]) + +(define_insn "end_brtab" + [(const_int 2)] + "" + "* +{ + /* Only GAS actually supports this pseudo-op. */ + if (TARGET_GAS) + return \".end_brtab\"; + else + return \"\"; +}" + [(set_attr "type" "move") + (set_attr "length" "0")]) + +;;; EH does longjmp's from and within the data section. Thus, +;;; an interspace branch is required for the longjmp implementation. +;;; Registers r1 and r2 are used as scratch registers for the jump +;;; when necessary. +(define_expand "interspace_jump" + [(parallel + [(set (pc) (match_operand 0 "pmode_register_operand" "a")) + (clobber (match_dup 1))])] + "" + " +{ + operands[1] = gen_rtx_REG (word_mode, 2); +}") + +(define_insn "" + [(set (pc) (match_operand 0 "pmode_register_operand" "a")) + (clobber (reg:SI 2))] + "TARGET_PA_20 && !TARGET_64BIT" + "bve%* (%0)" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "" + [(set (pc) (match_operand 0 "pmode_register_operand" "a")) + (clobber (reg:SI 2))] + "TARGET_NO_SPACE_REGS && !TARGET_64BIT" + "be%* 0(%%sr4,%0)" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "" + [(set (pc) (match_operand 0 "pmode_register_operand" "a")) + (clobber (reg:SI 2))] + "!TARGET_64BIT" + "ldsid (%%sr0,%0),%%r2\;mtsp %%r2,%%sr0\;be%* 0(%%sr0,%0)" + [(set_attr "type" "branch") + (set_attr "length" "12")]) + +(define_insn "" + [(set (pc) (match_operand 0 "pmode_register_operand" "a")) + (clobber (reg:DI 2))] + "TARGET_64BIT" + "bve%* (%0)" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_expand "builtin_longjmp" + [(unspec_volatile [(match_operand 0 "register_operand" "r")] UNSPECV_LONGJMP)] + "" + " +{ + /* The elements of the buffer are, in order: */ + rtx fp = gen_rtx_MEM (Pmode, operands[0]); + rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0], + POINTER_SIZE / BITS_PER_UNIT)); + rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0], + (POINTER_SIZE * 2) / BITS_PER_UNIT)); + rtx pv = gen_rtx_REG (Pmode, 1); + + emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); + emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); + + /* Restore the frame pointer. The virtual_stack_vars_rtx is saved + instead of the hard_frame_pointer_rtx in the save area. We need + to adjust for the offset between these two values. */ + if (GET_CODE (fp) != REG) + fp = force_reg (Pmode, fp); + emit_move_insn (hard_frame_pointer_rtx, plus_constant (fp, -8)); + + /* This bit is the same as expand_builtin_longjmp. */ + emit_stack_restore (SAVE_NONLOCAL, stack); + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + /* Load the label we are jumping through into r1 so that we know + where to look for it when we get back to setjmp's function for + restoring the gp. */ + emit_move_insn (pv, lab); + + /* Prevent the insns above from being scheduled into the delay slot + of the interspace jump because the space register could change. */ + emit_insn (gen_blockage ()); + + emit_jump_insn (gen_interspace_jump (pv)); + emit_barrier (); + DONE; +}") + +;;; Operands 2 and 3 are assumed to be CONST_INTs. +(define_expand "extzv" + [(set (match_operand 0 "register_operand" "") + (zero_extract (match_operand 1 "register_operand" "") + (match_operand 2 "uint32_operand" "") + (match_operand 3 "uint32_operand" "")))] + "" + " +{ + HOST_WIDE_INT len = INTVAL (operands[2]); + HOST_WIDE_INT pos = INTVAL (operands[3]); + + /* PA extraction insns don't support zero length bitfields or fields + extending beyond the left or right-most bits. Also, we reject lengths + equal to a word as they are better handled by the move patterns. */ + if (len <= 0 || len >= BITS_PER_WORD || pos < 0 || pos + len > BITS_PER_WORD) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (!register_operand (operands[1], VOIDmode)) + FAIL; + + if (TARGET_64BIT) + emit_insn (gen_extzv_64 (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_extzv_32 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}") + +(define_insn "extzv_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "uint5_operand" "") + (match_operand:SI 3 "uint5_operand" "")))] + "" + "{extru|extrw,u} %1,%3+%2-1,%2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1) + (match_operand:SI 2 "register_operand" "q")))] + "" + "{vextru %1,1,%0|extrw,u %1,%%sar,1,%0}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extzv_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "uint32_operand" "") + (match_operand:DI 3 "uint32_operand" "")))] + "TARGET_64BIT" + "extrd,u %1,%3+%2-1,%2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (match_operand:DI 2 "register_operand" "q")))] + "TARGET_64BIT" + "extrd,u %1,%%sar,1,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +;;; Operands 2 and 3 are assumed to be CONST_INTs. +(define_expand "extv" + [(set (match_operand 0 "register_operand" "") + (sign_extract (match_operand 1 "register_operand" "") + (match_operand 2 "uint32_operand" "") + (match_operand 3 "uint32_operand" "")))] + "" + " +{ + HOST_WIDE_INT len = INTVAL (operands[2]); + HOST_WIDE_INT pos = INTVAL (operands[3]); + + /* PA extraction insns don't support zero length bitfields or fields + extending beyond the left or right-most bits. Also, we reject lengths + equal to a word as they are better handled by the move patterns. */ + if (len <= 0 || len >= BITS_PER_WORD || pos < 0 || pos + len > BITS_PER_WORD) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (!register_operand (operands[1], VOIDmode)) + FAIL; + + if (TARGET_64BIT) + emit_insn (gen_extv_64 (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_extv_32 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}") + +(define_insn "extv_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "uint5_operand" "") + (match_operand:SI 3 "uint5_operand" "")))] + "" + "{extrs|extrw,s} %1,%3+%2-1,%2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1) + (match_operand:SI 2 "register_operand" "q")))] + "!TARGET_64BIT" + "{vextrs %1,1,%0|extrw,s %1,%%sar,1,%0}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "extv_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "uint32_operand" "") + (match_operand:DI 3 "uint32_operand" "")))] + "TARGET_64BIT" + "extrd,s %1,%3+%2-1,%2,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (match_operand:DI 2 "register_operand" "q")))] + "TARGET_64BIT" + "extrd,s %1,%%sar,1,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +;;; Operands 1 and 2 are assumed to be CONST_INTs. +(define_expand "insv" + [(set (zero_extract (match_operand 0 "register_operand" "") + (match_operand 1 "uint32_operand" "") + (match_operand 2 "uint32_operand" "")) + (match_operand 3 "arith5_operand" ""))] + "" + " +{ + HOST_WIDE_INT len = INTVAL (operands[1]); + HOST_WIDE_INT pos = INTVAL (operands[2]); + + /* PA insertion insns don't support zero length bitfields or fields + extending beyond the left or right-most bits. Also, we reject lengths + equal to a word as they are better handled by the move patterns. */ + if (len <= 0 || len >= BITS_PER_WORD || pos < 0 || pos + len > BITS_PER_WORD) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our destination + matches the predicate, so check it again here. */ + if (!register_operand (operands[0], VOIDmode)) + FAIL; + + if (TARGET_64BIT) + emit_insn (gen_insv_64 (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_insv_32 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}") + +(define_insn "insv_32" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r,r") + (match_operand:SI 1 "uint5_operand" "") + (match_operand:SI 2 "uint5_operand" "")) + (match_operand:SI 3 "arith5_operand" "r,L"))] + "" + "@ + {dep|depw} %3,%2+%1-1,%1,%0 + {depi|depwi} %3,%2+%1-1,%1,%0" + [(set_attr "type" "shift,shift") + (set_attr "length" "4,4")]) + +;; Optimize insertion of const_int values of type 1...1xxxx. +(define_insn "" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "uint5_operand" "") + (match_operand:SI 2 "uint5_operand" "")) + (match_operand:SI 3 "const_int_operand" ""))] + "(INTVAL (operands[3]) & 0x10) != 0 && + (~INTVAL (operands[3]) & ((1L << INTVAL (operands[1])) - 1) & ~0xf) == 0" + "* +{ + operands[3] = GEN_INT ((INTVAL (operands[3]) & 0xf) - 0x10); + return \"{depi|depwi} %3,%2+%1-1,%1,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "insv_64" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r,r") + (match_operand:DI 1 "uint32_operand" "") + (match_operand:DI 2 "uint32_operand" "")) + (match_operand:DI 3 "arith32_operand" "r,L"))] + "TARGET_64BIT" + "@ + depd %3,%2+%1-1,%1,%0 + depdi %3,%2+%1-1,%1,%0" + [(set_attr "type" "shift,shift") + (set_attr "length" "4,4")]) + +;; Optimize insertion of const_int values of type 1...1xxxx. +(define_insn "" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "uint32_operand" "") + (match_operand:DI 2 "uint32_operand" "")) + (match_operand:DI 3 "const_int_operand" ""))] + "(INTVAL (operands[3]) & 0x10) != 0 + && TARGET_64BIT + && (~INTVAL (operands[3]) & ((1L << INTVAL (operands[1])) - 1) & ~0xf) == 0" + "* +{ + operands[3] = GEN_INT ((INTVAL (operands[3]) & 0xf) - 0x10); + return \"depdi %3,%2+%1-1,%1,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (const_int 32)))] + "TARGET_64BIT" + "depd,z %1,31,32,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +;; This insn is used for some loop tests, typically loops reversed when +;; strength reduction is used. It is actually created when the instruction +;; combination phase combines the special loop test. Since this insn +;; is both a jump insn and has an output, it must deal with its own +;; reloads, hence the `m' constraints. The `!' constraints direct reload +;; to not choose the register alternatives in the event a reload is needed. +(define_insn "decrement_and_branch_until_zero" + [(set (pc) + (if_then_else + (match_operator 2 "comparison_operator" + [(plus:SI + (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m") + (match_operand:SI 1 "int5_operand" "L,L,L")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1))) + (clobber (match_scratch:SI 4 "=X,r,r"))] + "" + "* return output_dbra (operands, insn, which_alternative); " +;; Do not expect to understand this the first time through. +[(set_attr "type" "cbranch,multi,multi") + (set (attr "length") + (if_then_else (eq_attr "alternative" "0") +;; Loop counter in register case +;; Short branch has length of 4 +;; Long branch has length of 8, 20, 24 or 28 + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)) + +;; Loop counter in FP reg case. +;; Extra goo to deal with additional reload insns. + (if_then_else (eq_attr "alternative" "1") + (if_then_else (lt (match_dup 3) (pc)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 24)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 24) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 24)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 28) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 44) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 40)] + (const_int 48)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 24) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 28) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 44) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 40)] + (const_int 48))) + +;; Loop counter in memory case. +;; Extra goo to deal with additional reload insns. + (if_then_else (lt (match_dup 3) (pc)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 12) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 16) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 12) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 16) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36))))))]) + +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 2 "movb_comparison_operator" + [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q") + (match_dup 1))] + "" +"* return output_movb (operands, insn, which_alternative, 0); " +;; Do not expect to understand this the first time through. +[(set_attr "type" "cbranch,multi,multi,multi") + (set (attr "length") + (if_then_else (eq_attr "alternative" "0") +;; Loop counter in register case +;; Short branch has length of 4 +;; Long branch has length of 8, 20, 24 or 28 + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)) + +;; Loop counter in FP reg case. +;; Extra goo to deal with additional reload insns. + (if_then_else (eq_attr "alternative" "1") + (if_then_else (lt (match_dup 3) (pc)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 12) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 16) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 12) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 16) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36))) + +;; Loop counter in memory or sar case. +;; Extra goo to deal with additional reload insns. + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 8) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 12) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 28) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 24)] + (const_int 32)))))]) + +;; Handle negated branch. +(define_insn "" + [(set (pc) + (if_then_else + (match_operator 2 "movb_comparison_operator" + [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)]) + (pc) + (label_ref (match_operand 3 "" "")))) + (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q") + (match_dup 1))] + "" +"* return output_movb (operands, insn, which_alternative, 1); " +;; Do not expect to understand this the first time through. +[(set_attr "type" "cbranch,multi,multi,multi") + (set (attr "length") + (if_then_else (eq_attr "alternative" "0") +;; Loop counter in register case +;; Short branch has length of 4 +;; Long branch has length of 8 + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)) + +;; Loop counter in FP reg case. +;; Extra goo to deal with additional reload insns. + (if_then_else (eq_attr "alternative" "1") + (if_then_else (lt (match_dup 3) (pc)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 12) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 16) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36)) + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 12) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 16) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 32) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 28)] + (const_int 36))) + +;; Loop counter in memory or SAR case. +;; Extra goo to deal with additional reload insns. + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 8) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 12) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 28) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 24)] + (const_int 32)))))]) + +(define_insn "" + [(set (pc) (label_ref (match_operand 3 "" "" ))) + (set (match_operand:SI 0 "ireg_operand" "=r") + (plus:SI (match_operand:SI 1 "ireg_operand" "r") + (match_operand:SI 2 "ireg_or_int5_operand" "rL")))] + "(reload_completed && operands[0] == operands[1]) || operands[0] == operands[2]" + "* +{ + return output_parallel_addb (operands, insn); +}" +[(set_attr "type" "parallel_branch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) (label_ref (match_operand 2 "" "" ))) + (set (match_operand:SF 0 "ireg_operand" "=r") + (match_operand:SF 1 "ireg_or_int5_operand" "rL"))] + "reload_completed" + "* +{ + return output_parallel_movb (operands, insn); +}" +[(set_attr "type" "parallel_branch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) (label_ref (match_operand 2 "" "" ))) + (set (match_operand:SI 0 "ireg_operand" "=r") + (match_operand:SI 1 "ireg_or_int5_operand" "rL"))] + "reload_completed" + "* +{ + return output_parallel_movb (operands, insn); +}" +[(set_attr "type" "parallel_branch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) (label_ref (match_operand 2 "" "" ))) + (set (match_operand:HI 0 "ireg_operand" "=r") + (match_operand:HI 1 "ireg_or_int5_operand" "rL"))] + "reload_completed" + "* +{ + return output_parallel_movb (operands, insn); +}" +[(set_attr "type" "parallel_branch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (pc) (label_ref (match_operand 2 "" "" ))) + (set (match_operand:QI 0 "ireg_operand" "=r") + (match_operand:QI 1 "ireg_or_int5_operand" "rL"))] + "reload_completed" + "* +{ + return output_parallel_movb (operands, insn); +}" +[(set_attr "type" "parallel_branch") + (set (attr "length") + (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_12BIT_OFFSET)) + (const_int 4) + (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8)))) + (const_int MAX_17BIT_OFFSET)) + (const_int 8) + (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (const_int 24) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 20)] + (const_int 28)))]) + +(define_insn "" + [(set (match_operand 0 "register_operand" "=f") + (mult (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (set (match_operand 3 "register_operand" "+f") + (plus (match_operand 4 "register_operand" "f") + (match_operand 5 "register_operand" "f")))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT + && reload_completed && fmpyaddoperands (operands)" + "* +{ + if (GET_MODE (operands[0]) == DFmode) + { + if (rtx_equal_p (operands[3], operands[5])) + return \"fmpyadd,dbl %1,%2,%0,%4,%3\"; + else + return \"fmpyadd,dbl %1,%2,%0,%5,%3\"; + } + else + { + if (rtx_equal_p (operands[3], operands[5])) + return \"fmpyadd,sgl %1,%2,%0,%4,%3\"; + else + return \"fmpyadd,sgl %1,%2,%0,%5,%3\"; + } +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand 3 "register_operand" "+f") + (plus (match_operand 4 "register_operand" "f") + (match_operand 5 "register_operand" "f"))) + (set (match_operand 0 "register_operand" "=f") + (mult (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT + && reload_completed && fmpyaddoperands (operands)" + "* +{ + if (GET_MODE (operands[0]) == DFmode) + { + if (rtx_equal_p (operands[3], operands[5])) + return \"fmpyadd,dbl %1,%2,%0,%4,%3\"; + else + return \"fmpyadd,dbl %1,%2,%0,%5,%3\"; + } + else + { + if (rtx_equal_p (operands[3], operands[5])) + return \"fmpyadd,sgl %1,%2,%0,%4,%3\"; + else + return \"fmpyadd,sgl %1,%2,%0,%5,%3\"; + } +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand 0 "register_operand" "=f") + (mult (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (set (match_operand 3 "register_operand" "+f") + (minus (match_operand 4 "register_operand" "f") + (match_operand 5 "register_operand" "f")))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT + && reload_completed && fmpysuboperands (operands)" + "* +{ + if (GET_MODE (operands[0]) == DFmode) + return \"fmpysub,dbl %1,%2,%0,%5,%3\"; + else + return \"fmpysub,sgl %1,%2,%0,%5,%3\"; +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +(define_insn "" + [(set (match_operand 3 "register_operand" "+f") + (minus (match_operand 4 "register_operand" "f") + (match_operand 5 "register_operand" "f"))) + (set (match_operand 0 "register_operand" "=f") + (mult (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")))] + "TARGET_PA_11 && ! TARGET_SOFT_FLOAT + && reload_completed && fmpysuboperands (operands)" + "* +{ + if (GET_MODE (operands[0]) == DFmode) + return \"fmpysub,dbl %1,%2,%0,%5,%3\"; + else + return \"fmpysub,sgl %1,%2,%0,%5,%3\"; +}" + [(set_attr "type" "fpalu") + (set_attr "length" "4")]) + +;; The following two patterns are used by the trampoline code for nested +;; functions. They flush the I and D cache lines from the start address +;; (operand0) to the end address (operand1). No lines are flushed if the +;; end address is less than the start address (unsigned). +;; +;; Because the range of memory flushed is variable and the size of a MEM +;; can only be a CONST_INT, the patterns specify that they perform an +;; unspecified volatile operation on all memory. +;; +;; The address range for an icache flush must lie within a single +;; space on targets with non-equivalent space registers. +;; +;; Operand 0 contains the start address. +;; Operand 1 contains the end address. +;; Operand 2 contains the line length to use. +(define_insn "dcacheflush<P:mode>" + [(const_int 1) + (unspec_volatile [(mem:BLK (scratch))] UNSPECV_DCACHE) + (use (match_operand 0 "pmode_register_operand" "r")) + (use (match_operand 1 "pmode_register_operand" "r")) + (use (match_operand 2 "pmode_register_operand" "r")) + (clobber (match_scratch:P 3 "=&0"))] + "" + "cmpb,<dwc><<=,n %3,%1,.\;fdc,m %2(%3)\;sync" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "icacheflush<P:mode>" + [(const_int 2) + (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE) + (use (match_operand 0 "pmode_register_operand" "r")) + (use (match_operand 1 "pmode_register_operand" "r")) + (use (match_operand 2 "pmode_register_operand" "r")) + (clobber (match_operand 3 "pmode_register_operand" "=&r")) + (clobber (match_operand 4 "pmode_register_operand" "=&r")) + (clobber (match_scratch:P 5 "=&0"))] + "" + "mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,<dwc><<=,n %5,%1,.\;fic,m %2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop" + [(set_attr "type" "multi") + (set_attr "length" "52")]) + +;; An out-of-line prologue. +(define_insn "outline_prologue_call" + [(unspec_volatile [(const_int 0)] UNSPECV_OPC) + (clobber (reg:SI 31)) + (clobber (reg:SI 22)) + (clobber (reg:SI 21)) + (clobber (reg:SI 20)) + (clobber (reg:SI 19)) + (clobber (reg:SI 1))] + "" + "* +{ + + /* We need two different versions depending on whether or not we + need a frame pointer. Also note that we return to the instruction + immediately after the branch rather than two instructions after the + break as normally is the case. */ + if (frame_pointer_needed) + { + /* Must import the magic millicode routine(s). */ + output_asm_insn (\".IMPORT __outline_prologue_fp,MILLICODE\", NULL); + + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn (\"ldil L'__outline_prologue_fp,%%r31\", NULL); + output_asm_insn (\"ble,n R'__outline_prologue_fp(%%sr0,%%r31)\", + NULL); + } + else + output_asm_insn (\"{bl|b,l},n __outline_prologue_fp,%%r31\", NULL); + } + else + { + /* Must import the magic millicode routine(s). */ + output_asm_insn (\".IMPORT __outline_prologue,MILLICODE\", NULL); + + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn (\"ldil L'__outline_prologue,%%r31\", NULL); + output_asm_insn (\"ble,n R'__outline_prologue(%%sr0,%%r31)\", NULL); + } + else + output_asm_insn (\"{bl|b,l},n __outline_prologue,%%r31\", NULL); + } + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +;; An out-of-line epilogue. +(define_insn "outline_epilogue_call" + [(unspec_volatile [(const_int 1)] UNSPECV_OEC) + (use (reg:SI 29)) + (use (reg:SI 28)) + (clobber (reg:SI 31)) + (clobber (reg:SI 22)) + (clobber (reg:SI 21)) + (clobber (reg:SI 20)) + (clobber (reg:SI 19)) + (clobber (reg:SI 2)) + (clobber (reg:SI 1))] + "" + "* +{ + + /* We need two different versions depending on whether or not we + need a frame pointer. Also note that we return to the instruction + immediately after the branch rather than two instructions after the + break as normally is the case. */ + if (frame_pointer_needed) + { + /* Must import the magic millicode routine. */ + output_asm_insn (\".IMPORT __outline_epilogue_fp,MILLICODE\", NULL); + + /* The out-of-line prologue will make sure we return to the right + instruction. */ + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn (\"ldil L'__outline_epilogue_fp,%%r31\", NULL); + output_asm_insn (\"ble,n R'__outline_epilogue_fp(%%sr0,%%r31)\", + NULL); + } + else + output_asm_insn (\"{bl|b,l},n __outline_epilogue_fp,%%r31\", NULL); + } + else + { + /* Must import the magic millicode routine. */ + output_asm_insn (\".IMPORT __outline_epilogue,MILLICODE\", NULL); + + /* The out-of-line prologue will make sure we return to the right + instruction. */ + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn (\"ldil L'__outline_epilogue,%%r31\", NULL); + output_asm_insn (\"ble,n R'__outline_epilogue(%%sr0,%%r31)\", NULL); + } + else + output_asm_insn (\"{bl|b,l},n __outline_epilogue,%%r31\", NULL); + } + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +;; Given a function pointer, canonicalize it so it can be +;; reliably compared to another function pointer. */ +(define_expand "canonicalize_funcptr_for_compare" + [(set (reg:SI 26) (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC)) + (clobber (match_dup 2)) + (clobber (reg:SI 26)) + (clobber (reg:SI 22)) + (clobber (reg:SI 31))]) + (set (match_operand:SI 0 "register_operand" "") + (reg:SI 29))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + " +{ + if (TARGET_ELF32) + { + rtx canonicalize_funcptr_for_compare_libfunc + = init_one_libfunc (CANONICALIZE_FUNCPTR_FOR_COMPARE_LIBCALL); + + emit_library_call_value (canonicalize_funcptr_for_compare_libfunc, + operands[0], LCT_NORMAL, Pmode, + 1, operands[1], Pmode); + DONE; + } + + operands[2] = gen_reg_rtx (SImode); + if (GET_CODE (operands[1]) != REG) + { + rtx tmp = gen_reg_rtx (Pmode); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } +}") + +(define_insn "*$$sh_func_adrs" + [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC)) + (clobber (match_operand:SI 0 "register_operand" "=a")) + (clobber (reg:SI 26)) + (clobber (reg:SI 22)) + (clobber (reg:SI 31))] + "!TARGET_64BIT" + "* +{ + int length = get_attr_length (insn); + rtx xoperands[2]; + + xoperands[0] = GEN_INT (length - 8); + xoperands[1] = GEN_INT (length - 16); + + /* Must import the magic millicode routine. */ + output_asm_insn (\".IMPORT $$sh_func_adrs,MILLICODE\", NULL); + + /* This is absolutely amazing. + + First, copy our input parameter into %r29 just in case we don't + need to call $$sh_func_adrs. */ + output_asm_insn (\"copy %%r26,%%r29\", NULL); + output_asm_insn (\"{extru|extrw,u} %%r26,31,2,%%r31\", NULL); + + /* Next, examine the low two bits in %r26, if they aren't 0x2, then + we use %r26 unchanged. */ + output_asm_insn (\"{comib|cmpib},<>,n 2,%%r31,.+%0\", xoperands); + output_asm_insn (\"ldi 4096,%%r31\", NULL); + + /* Next, compare %r26 with 4096, if %r26 is less than or equal to + 4096, then again we use %r26 unchanged. */ + output_asm_insn (\"{comb|cmpb},<<,n %%r26,%%r31,.+%1\", xoperands); + + /* Finally, call $$sh_func_adrs to extract the function's real add24. */ + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, + \"$$sh_func_adrs\")); +}" + [(set_attr "type" "sh_func_adrs") + (set (attr "length") + (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 28)] + (plus (symbol_ref "attr_length_millicode_call (insn)") + (const_int 20))))]) + +;; On the PA, the PIC register is call clobbered, so it must +;; be saved & restored around calls by the caller. If the call +;; doesn't return normally (nonlocal goto, or an exception is +;; thrown), then the code at the exception handler label must +;; restore the PIC register. +(define_expand "exception_receiver" + [(const_int 4)] + "flag_pic" + " +{ + /* On the 64-bit port, we need a blockage because there is + confusion regarding the dependence of the restore on the + frame pointer. As a result, the frame pointer and pic + register restores sometimes are interchanged erroneously. */ + if (TARGET_64BIT) + emit_insn (gen_blockage ()); + /* Restore the PIC register using hppa_pic_save_rtx (). The + PIC register is not saved in the frame in 64-bit ABI. */ + emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ()); + emit_insn (gen_blockage ()); + DONE; +}") + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "flag_pic" + " +{ + if (TARGET_64BIT) + emit_insn (gen_blockage ()); + /* Restore the PIC register. Hopefully, this will always be from + a stack slot. The only registers that are valid after a + builtin_longjmp are the stack and frame pointers. */ + emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ()); + emit_insn (gen_blockage ()); + DONE; +}") + +;; Allocate new stack space and update the saved stack pointer in the +;; frame marker. The HP C compilers also copy additional words in the +;; frame marker. The 64-bit compiler copies words at -48, -32 and -24. +;; The 32-bit compiler copies the word at -16 (Static Link). We +;; currently don't copy these values. +;; +;; Since the copy of the frame marker can't be done atomically, I +;; suspect that using it for unwind purposes may be somewhat unreliable. +;; The HP compilers appear to raise the stack and copy the frame +;; marker in a strict instruction sequence. This suggests that the +;; unwind library may check for an alloca sequence when ALLOCA_FRAME +;; is set in the callinfo data. We currently don't set ALLOCA_FRAME +;; as GAS doesn't support it, or try to keep the instructions emitted +;; here in strict sequence. +(define_expand "allocate_stack" + [(match_operand 0 "" "") + (match_operand 1 "" "")] + "" + " +{ + rtx addr; + + /* Since the stack grows upward, we need to store virtual_stack_dynamic_rtx + in operand 0 before adjusting the stack. */ + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + anti_adjust_stack (operands[1]); + if (TARGET_HPUX_UNWIND_LIBRARY) + { + addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, + GEN_INT (TARGET_64BIT ? -8 : -4)); + emit_move_insn (gen_rtx_MEM (word_mode, addr), hard_frame_pointer_rtx); + } + if (!TARGET_64BIT && flag_pic) + { + rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); + emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); + } + DONE; +}") + +(define_expand "prefetch" + [(match_operand 0 "address_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_PA_20" +{ + operands[0] = copy_addr_to_reg (operands[0]); + emit_insn (gen_prefetch_20 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "prefetch_20" + [(prefetch (match_operand 0 "pmode_register_operand" "r") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "TARGET_PA_20" +{ + /* The SL cache-control completer indicates good spatial locality but + poor temporal locality. The ldw instruction with a target of general + register 0 prefetches a cache line for a read. The ldd instruction + prefetches a cache line for a write. */ + static const char * const instr[2][2] = { + { + "ldw,sl 0(%0),%%r0", + "ldd,sl 0(%0),%%r0" + }, + { + "ldw 0(%0),%%r0", + "ldd 0(%0),%%r0" + } + }; + int read_or_write = INTVAL (operands[1]) == 0 ? 0 : 1; + int locality = INTVAL (operands[2]) == 0 ? 0 : 1; + + return instr [locality][read_or_write]; +} + [(set_attr "type" "load") + (set_attr "length" "4")]) + +;; TLS Support +(define_insn "tgd_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD)) + (clobber (reg:SI 1)) + (use (reg:SI 27))] + "" + "* +{ + return \"addil LR'%1-$tls_gdidx$,%%r27\;ldo RR'%1-$tls_gdidx$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tgd_load_pic" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD_PIC)) + (clobber (reg:SI 1)) + (use (reg:SI 19))] + "" + "* +{ + return \"addil LT'%1-$tls_gdidx$,%%r19\;ldo RT'%1-$tls_gdidx$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tld_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM)) + (clobber (reg:SI 1)) + (use (reg:SI 27))] + "" + "* +{ + return \"addil LR'%1-$tls_ldidx$,%%r27\;ldo RR'%1-$tls_ldidx$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tld_load_pic" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM_PIC)) + (clobber (reg:SI 1)) + (use (reg:SI 19))] + "" + "* +{ + return \"addil LT'%1-$tls_ldidx$,%%r19\;ldo RT'%1-$tls_ldidx$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tld_offset_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:SI 1))] + "" + "* +{ + return \"addil LR'%1-$tls_dtpoff$,%2\;ldo RR'%1-$tls_dtpoff$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tp_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "" + "mfctl %%cr27,%0" + [(set_attr "type" "multi") + (set_attr "length" "4")]) + +(define_insn "tie_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE)) + (clobber (reg:SI 1)) + (use (reg:SI 27))] + "" + "* +{ + return \"addil LR'%1-$tls_ieoff$,%%r27\;ldw RR'%1-$tls_ieoff$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tie_load_pic" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE_PIC)) + (clobber (reg:SI 1)) + (use (reg:SI 19))] + "" + "* +{ + return \"addil LT'%1-$tls_ieoff$,%%r19\;ldw RT'%1-$tls_ieoff$(%%r1),%0\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "tle_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(match_operand 1 "tle_symbolic_operand" "")] + UNSPEC_TLSLE) + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:SI 1))] + "" + "addil LR'%1-$tls_leoff$,%2\;ldo RR'%1-$tls_leoff$(%%r1),%0" + [(set_attr "type" "multi") + (set_attr "length" "8")]) diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt new file mode 100644 index 000000000..6d10544b7 --- /dev/null +++ b/gcc/config/pa/pa.opt @@ -0,0 +1,118 @@ +; Options for the HP PA-RISC port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +march=1.0 +Target RejectNegative +Generate PA1.0 code + +march=1.1 +Target RejectNegative +Generate PA1.1 code + +march=2.0 +Target RejectNegative +Generate PA2.0 code (requires binutils 2.10 or later) + +mbig-switch +Target Report Mask(BIG_SWITCH) +Generate code for huge switch statements + +mdisable-fpregs +Target Report Mask(DISABLE_FPREGS) +Disable FP regs + +mdisable-indexing +Target Report Mask(DISABLE_INDEXING) +Disable indexed addressing + +mfast-indirect-calls +Target Report Mask(FAST_INDIRECT_CALLS) +Generate fast indirect calls + +mfixed-range= +Target RejectNegative Joined +Specify range of registers to make fixed + +mgas +Target Report Mask(GAS) +Assume code will be assembled by GAS + +mjump-in-delay +Target Report Mask(JUMP_IN_DELAY) +Put jumps in call delay slots + +;; Not used by gcc +mlinker-opt +Target RejectNegative +Enable linker optimizations + +mlong-calls +Target Report Mask(LONG_CALLS) +Always generate long calls + +mlong-load-store +Target Report Mask(LONG_LOAD_STORE) +Emit long load/store sequences + +mnosnake +Target RejectNegative +Generate PA1.0 code + +mno-space-regs +Target RejectNegative Report Mask(NO_SPACE_REGS) +Disable space regs + +mpa-risc-1-0 +Target RejectNegative +Generate PA1.0 code + +mpa-risc-1-1 +Target RejectNegative Mask(PA_11) +Generate PA1.1 code + +mpa-risc-2-0 +Target RejectNegative Mask(PA_20) +Generate PA2.0 code (requires binutils 2.10 or later) + +mportable-runtime +Target Report Mask(PORTABLE_RUNTIME) +Use portable calling conventions + +mschedule= +Target RejectNegative Joined +Specify CPU for scheduling purposes. Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000 + +msoft-float +Target Report Mask(SOFT_FLOAT) +Use software floating point + +msnake +Target RejectNegative +Generate PA1.1 code + +mspace-regs +Target RejectNegative Report InverseMask(NO_SPACE_REGS) +Do not disable space regs + +Mask(SIO) +;; Generate cpp defines for server I/O. + +Mask(GNU_LD) +;; Assume GNU linker by default diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h new file mode 100644 index 000000000..8d80a2100 --- /dev/null +++ b/gcc/config/pa/pa32-linux.h @@ -0,0 +1,67 @@ +/* Definitions for PA_RISC with ELF-32 format + Copyright (C) 2000, 2002, 2004, 2006, 2007, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Turn off various SOM crap we don't want. */ +#undef TARGET_ELF32 +#define TARGET_ELF32 1 + +/* The libcall __canonicalize_funcptr_for_compare is referenced in + crtend.o and the reference isn't resolved in objects that don't + compare function pointers. Thus, we need to play games to provide + a reference in crtbegin.o. The rest of the define is the same + as that in crtstuff.c */ +#define CTOR_LIST_BEGIN \ + asm (".type __canonicalize_funcptr_for_compare,@function\n" \ +" .text\n" \ +" .word __canonicalize_funcptr_for_compare-$PIC_pcrel$0"); \ + STATIC func_ptr __CTOR_LIST__[1] \ + __attribute__ ((__used__, section(".ctors"), \ + aligned(sizeof(func_ptr)))) \ + = { (func_ptr) (-1) } + +/* This is a PIC version of CRT_CALL_STATIC_FUNCTION. The PIC + register has to be saved before the call and restored after + the call. We assume that register %r4 is available for this + purpose. The hack prevents GCC from deleting the restore. */ +#ifdef CRTSTUFFS_O +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ +static void __attribute__((__used__)) \ +call_ ## FUNC (void) \ +{ \ + asm (SECTION_OP); \ + asm volatile ("bl " #FUNC ",%%r2\n\t" \ + "copy %%r19,%%r4\n\t" \ + "copy %%r4,%%r19\n" \ + : \ + : \ + : "r1", "r2", "r4", "r20", "r21", \ + "r22", "r23", "r24", "r25", "r26", \ + "r27", "r28", "r29", "r31"); \ + asm (TEXT_SECTION_ASM_OP); \ +} +#endif + +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +#define MD_UNWIND_SUPPORT "config/pa/linux-unwind.h" diff --git a/gcc/config/pa/pa32-regs.h b/gcc/config/pa/pa32-regs.h new file mode 100644 index 000000000..9a1c06726 --- /dev/null +++ b/gcc/config/pa/pa32-regs.h @@ -0,0 +1,373 @@ +/* Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, + 2008, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + HP-PA 1.0 has 32 fullword registers and 16 floating point + registers. The floating point registers hold either word or double + word values. + + 16 additional registers are reserved. + + HP-PA 1.1 has 32 fullword registers and 32 floating point + registers. However, the floating point registers behave + differently: the left and right halves of registers are addressable + as 32-bit registers. So, we will set things up like the 68k which + has different fp units: define separate register sets for the 1.0 + and 1.1 fp units. */ + +#define FIRST_PSEUDO_REGISTER 90 /* 32 general regs + 56 fp regs + + + 1 shift reg + frame pointer */ + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + On the HP-PA, these are: + Reg 0 = 0 (hardware). However, 0 is used for condition code, + so is not fixed. + Reg 1 = ADDIL target/Temporary (hardware). + Reg 2 = Return Pointer + Reg 3 = Frame Pointer + Reg 4 = Frame Pointer (>8k varying frame with HP compilers only) + Reg 4-18 = Preserved Registers + Reg 19 = Linkage Table Register in HPUX 8.0 shared library scheme. + Reg 20-22 = Temporary Registers + Reg 23-26 = Temporary/Parameter Registers + Reg 27 = Global Data Pointer (hp) + Reg 28 = Temporary/Return Value register + Reg 29 = Temporary/Static Chain/Return Value register #2 + Reg 30 = stack pointer + Reg 31 = Temporary/Millicode Return Pointer (hp) + + Freg 0-3 = Status Registers -- Not known to the compiler. + Freg 4-7 = Arguments/Return Value + Freg 8-11 = Temporary Registers + Freg 12-15 = Preserved Registers + + Freg 16-31 = Reserved + + On the Snake, fp regs are + + Freg 0-3 = Status Registers -- Not known to the compiler. + Freg 4L-7R = Arguments/Return Value + Freg 8L-11R = Temporary Registers + Freg 12L-21R = Preserved Registers + Freg 22L-31R = Temporary Registers + +*/ + +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 1, 0, 0, 1, 0, \ + /* fp registers */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + /* shift register and soft frame pointer */ \ + 0, 1} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ + {1, 1, 1, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + /* fp registers */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + /* shift register and soft frame pointer */ \ + 1, 1} + +/* Allocate the call used registers first. This should minimize + the number of registers that need to be saved (as call used + registers will generally not be allocated across a call). + + Experimentation has shown slightly better results by allocating + FP registers first. We allocate the caller-saved registers more + or less in reverse order to their allocation as arguments. + + FP registers are ordered so that all L registers are selected before + R registers. This works around a false dependency interlock on the + PA8000 when accessing the high and low parts of an FP register + independently. */ + +#define REG_ALLOC_ORDER \ + { \ + /* caller-saved fp regs. */ \ + 68, 70, 72, 74, 76, 78, 80, 82, \ + 84, 86, 40, 42, 44, 46, 38, 36, \ + 34, 32, \ + 69, 71, 73, 75, 77, 79, 81, 83, \ + 85, 87, 41, 43, 45, 47, 39, 37, \ + 35, 33, \ + /* caller-saved general regs. */ \ + 28, 19, 20, 21, 22, 31, 27, 29, \ + 23, 24, 25, 26, 2, \ + /* callee-saved fp regs. */ \ + 48, 50, 52, 54, 56, 58, 60, 62, \ + 64, 66, \ + 49, 51, 53, 55, 57, 59, 61, 63, \ + 65, 67, \ + /* callee-saved general regs. */ \ + 3, 4, 5, 6, 7, 8, 9, 10, \ + 11, 12, 13, 14, 15, 16, 17, 18, \ + /* special registers. */ \ + 1, 30, 0, 88, 89} + + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + On the HP-PA, general registers are 32 bits wide. The floating + point registers are 64 bits wide. Snake fp regs are treated as + 32 bits wide since the left and right parts are independently + accessible. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (FP_REGNO_P (REGNO) \ + ? (!TARGET_PA_11 \ + ? COMPLEX_MODE_P (MODE) ? 2 : 1 \ + : (GET_MODE_SIZE (MODE) + 4 - 1) / 4) \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* There are no instructions that use DImode in PA 1.0, so we only + allow it in PA 1.1 and later. */ +#define VALID_FP_MODE_P(MODE) \ + ((MODE) == SFmode || (MODE) == DFmode \ + || (MODE) == SCmode || (MODE) == DCmode \ + || (MODE) == SImode || (TARGET_PA_11 && (MODE) == DImode)) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. + + On the HP-PA, the cpu registers can hold any mode that fits in 32 bits. + For the 64-bit modes, we choose a set of non-overlapping general registers + that includes the incoming arguments and the return value. We specify a + set with no overlaps so that we don't have to specify that the destination + register is an early clobber in patterns using this mode. Except for the + return value, the starting registers are odd. For 128 and 256 bit modes, + we similarly specify non-overlapping sets of cpu registers. However, + there aren't any patterns defined for modes larger than 64 bits at the + moment. + + We limit the modes allowed in the floating point registers to the + set of modes used in the machine definition. In addition, we allow + the complex modes SCmode and DCmode. The real and imaginary parts + of complex modes are allocated to separate registers. This might + allow patterns to be defined in the future to operate on these values. + + The PA 2.0 architecture specifies that quad-precision floating-point + values should start on an even floating point register. Thus, we + choose non-overlapping sets of registers starting on even register + boundaries for large modes. However, there is currently no support + in the machine definition for modes larger than 64 bits. TFmode is + supported under HP-UX using libcalls. Since TFmode values are passed + by reference, they never need to be loaded into the floating-point + registers. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + ((REGNO) == 0 ? (MODE) == CCmode || (MODE) == CCFPmode \ + : (REGNO) == 88 ? SCALAR_INT_MODE_P (MODE) \ + : !TARGET_PA_11 && FP_REGNO_P (REGNO) \ + ? (VALID_FP_MODE_P (MODE) \ + && (GET_MODE_SIZE (MODE) <= 8 \ + || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0))) \ + : FP_REGNO_P (REGNO) \ + ? (VALID_FP_MODE_P (MODE) \ + && (GET_MODE_SIZE (MODE) <= 4 \ + || (GET_MODE_SIZE (MODE) == 8 && ((REGNO) & 1) == 0) \ + || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0) \ + || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 7) == 0))) \ + : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD \ + || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD \ + && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28)) \ + || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD \ + && ((REGNO) & 3) == 3 && (REGNO) <= 23) \ + || (GET_MODE_SIZE (MODE) == 8 * UNITS_PER_WORD \ + && ((REGNO) & 7) == 3 && (REGNO) <= 19))) + +/* How to renumber registers for dbx and gdb. + + Registers 0 - 31 remain unchanged. + + Registers 32 - 87 are mapped to 72 - 127 + + Register 88 is mapped to 32. */ + +#define DBX_REGISTER_NUMBER(REGNO) \ + ((REGNO) <= 31 ? (REGNO) : \ + ((REGNO) <= 87 ? (REGNO) + 40 : 32)) + +/* We must not use the DBX register numbers for the DWARF 2 CFA column + numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER. + Instead use the identity mapping. */ +#define DWARF_FRAME_REGNUM(REG) REG + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + + /* The HP-PA has four kinds of registers: general regs, 1.0 fp regs, + 1.1 fp regs, and the high 1.1 fp regs, to which the operands of + fmpyadd and fmpysub are restricted. */ + +enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS, + GENERAL_OR_FP_REGS, SHIFT_REGS, ALL_REGS, LIM_REG_CLASSES}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ + {"NO_REGS", "R1_REGS", "GENERAL_REGS", "FPUPPER_REGS", "FP_REGS", \ + "GENERAL_OR_FP_REGS", "SHIFT_REGS", "ALL_REGS"} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. Register 0, the "condition code" register, + is in no class. */ + +#define REG_CLASS_CONTENTS \ + {{0x00000000, 0x00000000, 0x00000000}, /* NO_REGS */ \ + {0x00000002, 0x00000000, 0x00000000}, /* R1_REGS */ \ + {0xfffffffe, 0x00000000, 0x02000000}, /* GENERAL_REGS */ \ + {0x00000000, 0xff000000, 0x00ffffff}, /* FPUPPER_REGS */ \ + {0x00000000, 0xffffffff, 0x00ffffff}, /* FP_REGS */ \ + {0xfffffffe, 0xffffffff, 0x02ffffff}, /* GENERAL_OR_FP_REGS */ \ + {0x00000000, 0x00000000, 0x01000000}, /* SHIFT_REGS */ \ + {0xfffffffe, 0xffffffff, 0x03ffffff}} /* ALL_REGS */ + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FP_REGS, SHIFT_REGS, LIM_REG_CLASSES \ +} + +/* Defines invalid mode changes. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + pa_cannot_change_mode_class (FROM, TO, CLASS) + +/* Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == 0 ? NO_REGS \ + : (REGNO) == 1 ? R1_REGS \ + : (REGNO) < 32 || (REGNO) == 89 ? GENERAL_REGS \ + : (REGNO) < 56 ? FP_REGS \ + : (REGNO) < 88 ? FPUPPER_REGS \ + : SHIFT_REGS) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS \ + ? (!TARGET_PA_11 \ + ? COMPLEX_MODE_P (MODE) ? 2 : 1 \ + : (GET_MODE_SIZE (MODE) + 4 - 1) / 4) \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* 1 if N is a possible register number for function argument passing. */ + +#define FUNCTION_ARG_REGNO_P(N) \ + (((N) >= 23 && (N) <= 26) || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39)) + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ +{"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \ + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ + "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", \ + "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", \ + "%fr4", "%fr4R", "%fr5", "%fr5R", "%fr6", "%fr6R", "%fr7", "%fr7R", \ + "%fr8", "%fr8R", "%fr9", "%fr9R", "%fr10", "%fr10R", "%fr11", "%fr11R", \ + "%fr12", "%fr12R", "%fr13", "%fr13R", "%fr14", "%fr14R", "%fr15", "%fr15R", \ + "%fr16", "%fr16R", "%fr17", "%fr17R", "%fr18", "%fr18R", "%fr19", "%fr19R", \ + "%fr20", "%fr20R", "%fr21", "%fr21R", "%fr22", "%fr22R", "%fr23", "%fr23R", \ + "%fr24", "%fr24R", "%fr25", "%fr25R", "%fr26", "%fr26R", "%fr27", "%fr27R", \ + "%fr28", "%fr28R", "%fr29", "%fr29R", "%fr30", "%fr30R", "%fr31", "%fr31R", \ + "SAR", "sfp"} + +#define ADDITIONAL_REGISTER_NAMES \ +{{"%fr4L",32}, {"%fr5L",34}, {"%fr6L",36}, {"%fr7L",38}, \ + {"%fr8L",40}, {"%fr9L",42}, {"%fr10L",44}, {"%fr11L",46}, \ + {"%fr12L",48}, {"%fr13L",50}, {"%fr14L",52}, {"%fr15L",54}, \ + {"%fr16L",56}, {"%fr17L",58}, {"%fr18L",60}, {"%fr19L",62}, \ + {"%fr20L",64}, {"%fr21L",66}, {"%fr22L",68}, {"%fr23L",70}, \ + {"%fr24L",72}, {"%fr25L",74}, {"%fr26L",76}, {"%fr27L",78}, \ + {"%fr28L",80}, {"%fr29L",82}, {"%fr30L",84}, {"%fr31R",86}, \ + {"%cr11",88}} + +#define FP_SAVED_REG_LAST 66 +#define FP_SAVED_REG_FIRST 48 +#define FP_REG_STEP 2 +#define FP_REG_FIRST 32 +#define FP_REG_LAST 87 diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h new file mode 100644 index 000000000..cc9724a6e --- /dev/null +++ b/gcc/config/pa/pa64-hpux.h @@ -0,0 +1,442 @@ +/* Definitions of target machine for GNU compiler, for HPs running + HPUX using the 64bit runtime model. + Copyright (C) 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* We can debug dynamically linked executables on hpux11; we also + want dereferencing of a NULL pointer to cause a SEGV. Do not move + the "+Accept TypeMismatch" switch. We check for it in collect2 + to determine which init/fini is needed. */ +#undef LINK_SPEC +#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD) +#define LINK_SPEC \ + "%{!shared:%{p:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{pg:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{!static:%{rdynamic:-E}}}\ + %{mhp-ld:+Accept TypeMismatch -z} %{mlinker-opt:-O}\ + %{!shared:-u main %{!nostdlib:%{!nodefaultlibs:-u __cxa_finalize}}}\ + %{static:-a archive} %{shared:%{mhp-ld:-b}%{!mhp-ld:-shared}}" +#else +#define LINK_SPEC \ + "%{!shared:%{p:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{pg:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\ + %nwarning: consider linking with '-static' as system libraries with\n\ + %n profiling support are only provided in archive format}}}\ + %{!shared:%{!static:%{rdynamic:-E}}}\ + %{!mgnu-ld:+Accept TypeMismatch -z} %{mlinker-opt:-O}\ + %{!shared:-u main %{!nostdlib:%{!nodefaultlibs:-u __cxa_finalize}}}\ + %{static:-a archive} %{shared:%{mgnu-ld:-shared}%{!mgnu-ld:-b}}" +#endif + +/* Profiling support is only provided in libc.a. However, libprof and + libgprof are only available in shared form on HP-UX 11.00. We use + the shared form if we are using the GNU linker or an archive form + isn't available. We also usually need to link with libdld and it's + only available in shared form. */ +#undef LIB_SPEC +#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD) +#define LIB_SPEC \ + "%{!shared:\ + %{!p:%{!pg:%{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ + %{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\ + -lprof %{static:-a archive}\ + %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ + %{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\ + -lgprof %{static:-a archive}\ + %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ + %{shared:%{mt|pthread:-lpthread}}" +#else +#define LIB_SPEC \ + "%{!shared:\ + %{!p:%{!pg:%{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ + %{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\ + -lprof %{static:-a archive}\ + %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ + %{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\ + -lgprof %{static:-a archive}\ + %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\ + %{mt|pthread:-lpthread} -lc\ + %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ + %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ + %{shared:%{mt|pthread:-lpthread}}" +#endif + +/* The libgcc_stub.a and milli.a libraries need to come last. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC "\ + %G %L %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}\ + milli.a%s}}" + +/* Under hpux11, the normal location of the `ld' and `as' programs is the + /usr/ccs/bin directory. */ + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_EXEC_PREFIX +#define MD_EXEC_PREFIX "/usr/ccs/bin" +#endif + +/* Default prefixes. */ + +#undef STANDARD_STARTFILE_PREFIX_1 +#define STANDARD_STARTFILE_PREFIX_1 "/lib/pa20_64/" + +#undef STANDARD_STARTFILE_PREFIX_2 +#define STANDARD_STARTFILE_PREFIX_2 "/usr/lib/pa20_64/" + +/* Under hpux11 the normal location of the various pa20_64 *crt*.o files + is the /usr/ccs/lib/pa20_64 directory. Some files may also be in the + /opt/langtools/lib/pa20_64 directory. */ + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_STARTFILE_PREFIX +#define MD_STARTFILE_PREFIX "/usr/ccs/lib/pa20_64/" +#endif + +#ifndef CROSS_DIRECTORY_STRUCTURE +#undef MD_STARTFILE_PREFIX_1 +#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/pa20_64/" +#endif + +/* This macro specifies the biggest alignment supported by the object + file format of this machine. + + The .align directive in the HP assembler allows alignments up to + 4096 bytes. However, the maximum alignment of a global common symbol + is 16 bytes using HP ld. Unfortunately, this macro doesn't provide + a method to check for common symbols. */ +#undef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT 32768 + +/* Due to limitations in the target structure, it isn't currently possible + to dynamically switch between the GNU and HP assemblers. */ +#undef TARGET_GAS + +/* Configure selects the standard ELFOS defines for use with GAS. */ +#ifdef USING_ELFOS_H + +/* We are using GAS. */ +#define TARGET_GAS 1 + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START pa_hpux64_gas_file_start + +/* This is how we output a null terminated string. */ +#undef STRING_ASM_OP +#define STRING_ASM_OP "\t.stringz\t" + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + +#define JCR_SECTION_NAME ".jcr" + +#define HP_INIT_ARRAY_SECTION_ASM_OP "\t.section\t.init" +#define GNU_INIT_ARRAY_SECTION_ASM_OP "\t.section\t.init_array" +#define HP_FINI_ARRAY_SECTION_ASM_OP "\t.section\t.fini" +#define GNU_FINI_ARRAY_SECTION_ASM_OP "\t.section\t.fini_array" + +/* We need to override the following two macros defined in elfos.h since + the .comm directive has a different syntax and it can't be used for + local common symbols. */ +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN) + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ + pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN) + +/* The define in pa.h doesn't work with the alias attribute. The + default is ok with the following define for GLOBAL_ASM_OP. */ +#undef TARGET_ASM_GLOBALIZE_LABEL + +/* This is how we globalize a label. */ +#define GLOBAL_ASM_OP "\t.globl\t" + +/* Hacked version from defaults.h that uses assemble_name_raw + instead of assemble_name. A symbol in a type directive that + isn't otherwise referenced doesn't cause the symbol to be + placed in the symbol table of the assembled object. */ +#undef ASM_OUTPUT_TYPE_DIRECTIVE +#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE) \ +do { \ + fputs (TYPE_ASM_OP, STREAM); \ + assemble_name_raw (STREAM, NAME); \ + fputs (", ", STREAM); \ + fprintf (STREAM, TYPE_OPERAND_FMT, TYPE); \ + putc ('\n', STREAM); \ +} while (0) + +/* Hacked version from elfos.h that doesn't output a label. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ +do { \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ +} while (0) + +/* The type of external references must be set correctly for the + dynamic loader to work correctly. This is equivalent to the + HP assembler's .IMPORT directive but relates more directly to + ELF object file types. */ +#undef ASM_OUTPUT_EXTERNAL +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + pa_hpux_asm_output_external ((FILE), (DECL), (NAME)) +#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \ +do { \ + if (FUNCTION_NAME_P (NAME)) \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + else \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + default_elf_asm_output_external (FILE, DECL, NAME); \ +} while (0) + +/* We need set the type for external libcalls. Also note that not all + libcall names are passed to targetm.encode_section_info (e.g., __main). + Thus, we also have to do the section encoding if it hasn't been done + already. */ +#undef ASM_OUTPUT_EXTERNAL_LIBCALL +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ +do { \ + if (!FUNCTION_NAME_P (XSTR (FUN, 0))) \ + hppa_encode_label (FUN); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function"); \ +} while (0) + +/* We need to use the HP style for internal labels. */ +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + sprintf (LABEL, "*%c$%s%04ld", (PREFIX)[0], (PREFIX) + 1, (long)(NUM)) + +#else /* USING_ELFOS_H */ + +/* We are not using GAS. */ +#define TARGET_GAS 0 + +/* HPUX 11 has the "new" HP assembler. It's still lousy, but it's a whole + lot better than the assembler shipped with older versions of hpux. + However, it doesn't support weak symbols and is a bad fit with ELF. */ +#undef NEW_HP_ASSEMBLER +#define NEW_HP_ASSEMBLER 1 + +/* It looks like DWARF2 will be the easiest debug format to handle on this + platform. */ +#define DWARF2_DEBUGGING_INFO 1 +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* This target uses the ELF object file format. */ +#define OBJECT_FORMAT_ELF + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START pa_hpux64_hpas_file_start + +#undef TEXT_SECTION_ASM_OP +#define TEXT_SECTION_ASM_OP "\t.SUBSPA $CODE$\n" +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.SUBSPA $LIT$\n" +#undef DATA_SECTION_ASM_OP +#define DATA_SECTION_ASM_OP "\t.SUBSPA $DATA$\n" +#undef BSS_SECTION_ASM_OP +#define BSS_SECTION_ASM_OP "\t.SUBSPA $BSS$\n" + +/* We provide explicit defines for CTORS_SECTION_ASM_OP and + DTORS_SECTION_ASM_OP since we don't yet have support for + named sections with the HP assembler. */ +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP "\t.SUBSPA \\.ctors,QUAD=1,ALIGN=8,ACCESS=31" +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP "\t.SUBSPA \\.dtors,QUAD=1,ALIGN=8,ACCESS=31" + +#define HP_INIT_ARRAY_SECTION_ASM_OP \ + "\t.SUBSPA \\.init,QUAD=1,ALIGN=8,ACCESS=31" +#define GNU_INIT_ARRAY_SECTION_ASM_OP \ + "\t.SUBSPA \\.init_array,QUAD=1,ALIGN=8,ACCESS=31" +#define HP_FINI_ARRAY_SECTION_ASM_OP \ + "\t.SUBSPA \\.fini,QUAD=1,ALIGN=8,ACCESS=31" +#define GNU_FINI_ARRAY_SECTION_ASM_OP \ + "\t.SUBSPA \\.fini_array,QUAD=1,ALIGN=8,ACCESS=31" + +#endif /* USING_ELFOS_H */ + +/* The following defines, used to run constructors and destructors with + the SOM linker under HP-UX 11, are not needed. */ +#undef HAS_INIT_SECTION +#undef LD_INIT_SWITCH +#undef LD_FINI_SWITCH + +/* The following STARTFILE_SPEC and ENDFILE_SPEC defines provide the + magic needed to run initializers and finalizers. */ +#undef STARTFILE_SPEC +#if TARGET_HPUX_11_11 +#define STARTFILE_SPEC \ + "%{!shared: %{!symbolic: crt0%O%s} %{munix=95:unix95.o%s} \ + %{!munix=93:%{!munix=95:unix98%O%s}}} %{static:crtbeginT%O%s} \ + %{!static:%{!shared:crtbegin%O%s} %{shared:crtbeginS%O%s}}" +#else +#define STARTFILE_SPEC \ + "%{!shared: %{!symbolic: crt0%O%s} %{munix=95:unix95%O%s}} \ + %{static:crtbeginT%O%s} %{!static:%{!shared:crtbegin%O%s} \ + %{shared:crtbeginS%O%s}}" +#endif +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}" + +/* Since HP uses the .init and .fini sections for array initializers + and finalizers, we need different defines for INIT_SECTION_ASM_OP + and FINI_SECTION_ASM_OP. With the implementation adopted below, + the sections are not actually used. However, we still must provide + defines to select the proper code path. */ +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#define FINI_SECTION_ASM_OP + +/* We are using array initializers and don't want calls in the INIT + and FINI sections. */ +#undef CRT_CALL_STATIC_FUNCTION +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) + +/* The init_priority attribute is not supported with HP ld. This could be + supported if collect2 was used with LD_INIT_SWITCH. Unfortunately, this + approach doesn't work with GNU ld since HP-UX doesn't support DT_INIT, + and therefore the -init and -fini GNU ld switches. */ +#undef SUPPORTS_INIT_PRIORITY +#define SUPPORTS_INIT_PRIORITY (TARGET_GNU_LD ? 1 : 0) + +/* We use DTOR_LIST_BEGIN to carry a bunch of hacks to allow us to use + the init and fini array sections with both the HP and GNU linkers. + The linkers setup the required dynamic entries in the dynamic segment + and the dynamic linker does the calls. This approach avoids using + collect2. + + The first hack is to implement __do_global_ctors_aux in crtbegin as + it needs to be the first entry in the init array so that it is called + last. HP got the order of the init array backwards. The DT_INIT_ARRAY + is supposed to be executed in the same order as the addresses appear in + the array. DT_FINI_ARRAY is supposed to be executed in the opposite + order. + + The second hack is a set of plabels to implement the effect of + CRT_CALL_STATIC_FUNCTION. HP-UX 11 only supports DI_INIT_ARRAY and + DT_FINI_ARRAY and they put the arrays in .init and .fini, rather than + in .init_array and .fini_array. The standard defines for .init and + .fini have the execute flag set. So, the assembler has to be hacked + to munge the standard flags for these sections to make them agree + with what the HP linker expects. With the GNU linker, we need to + used the .init_array and .fini_array sections. So, we set up for + both just in case. Once we have built the table, the linker does + the rest of the work. + + The order is significant. Placing __do_global_ctors_aux first in + the list, results in it being called last. User specified initializers, + either using the linker +init command or a plabel, run before the + initializers specified here. */ + +/* We need to add frame_dummy to the initializer list if EH_FRAME_SECTION_NAME + or JCR_SECTION_NAME is defined. */ +#if defined(EH_FRAME_SECTION_NAME) || defined(JCR_SECTION_NAME) +#define PA_INIT_FRAME_DUMMY_ASM_OP ".dword P%frame_dummy" +#else +#define PA_INIT_FRAME_DUMMY_ASM_OP "" +#endif + +/* The following hack sets up the .init, .init_array, .fini and + .fini_array sections. */ +#define PA_CRTBEGIN_HACK \ +asm (TEXT_SECTION_ASM_OP); \ +static void __attribute__((used)) \ +__do_global_ctors_aux (void) \ +{ \ + func_ptr *p = __CTOR_LIST__; \ + while (*(p + 1)) \ + p++; \ + for (; *p != (func_ptr) -1; p--) \ + (*p) (); \ +} \ + \ +asm (HP_INIT_ARRAY_SECTION_ASM_OP); \ +asm (".align 8"); \ +asm (".dword P%__do_global_ctors_aux"); \ +asm (PA_INIT_FRAME_DUMMY_ASM_OP); \ +asm (GNU_INIT_ARRAY_SECTION_ASM_OP); \ +asm (".align 8"); \ +asm (".dword P%__do_global_ctors_aux"); \ +asm (PA_INIT_FRAME_DUMMY_ASM_OP); \ +asm (HP_FINI_ARRAY_SECTION_ASM_OP); \ +asm (".align 8"); \ +asm (".dword P%__do_global_dtors_aux"); \ +asm (GNU_FINI_ARRAY_SECTION_ASM_OP); \ +asm (".align 8"); \ +asm (".dword P%__do_global_dtors_aux") + +/* The following two variants of DTOR_LIST_BEGIN are identical to those + in crtstuff.c except for the addition of the above crtbegin hack. */ +#ifdef DTORS_SECTION_ASM_OP +#define DTOR_LIST_BEGIN \ +asm (DTORS_SECTION_ASM_OP); \ +STATIC func_ptr __DTOR_LIST__[1] \ + __attribute__ ((aligned(sizeof(func_ptr)))) \ + = { (func_ptr) (-1) }; \ +PA_CRTBEGIN_HACK +#else +#define DTOR_LIST_BEGIN \ +STATIC func_ptr __DTOR_LIST__[1] \ + __attribute__ ((section(".dtors"), aligned(sizeof(func_ptr)))) \ + = { (func_ptr) (-1) }; \ +PA_CRTBEGIN_HACK +#endif + +/* If using HP ld do not call pxdb. Use size as a program that does nothing + and returns 0. /bin/true cannot be used because it is a script without + an interpreter. */ +#define INIT_ENVIRONMENT "LD_PXDB=/usr/ccs/bin/size" + +/* The HPUX dynamic linker objects to undefined weak symbols, so do + not use them in gthr-posix.h. */ +#define GTHREAD_USE_WEAK 0 + +/* We don't want undefined weak references to __register_frame_info, + __deregister_frame_info, _Jv_RegisterClasses and __cxa_finalize + introduced by crtbegin.o. The GNU linker only resolves weak + references if they appear in a shared library. Thus, it would be + impossible to create a static executable if the symbols were weak. + So, the best solution seems to be to make the symbols strong and + provide an archive library of empty stub functions. */ +#define TARGET_ATTRIBUTE_WEAK diff --git a/gcc/config/pa/pa64-hpux.opt b/gcc/config/pa/pa64-hpux.opt new file mode 100644 index 000000000..36b1c61ea --- /dev/null +++ b/gcc/config/pa/pa64-hpux.opt @@ -0,0 +1,27 @@ +; Options for the HP PA-RISC port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +mgnu-ld +Target RejectNegative Mask(GNU_LD) MaskExists +Assume code will be linked by GNU ld + +mhp-ld +Target RejectNegative InverseMask(GNU_LD) +Assume code will be linked by HP ld diff --git a/gcc/config/pa/pa64-linux.h b/gcc/config/pa/pa64-linux.h new file mode 100644 index 000000000..174d7c54d --- /dev/null +++ b/gcc/config/pa/pa64-linux.h @@ -0,0 +1,64 @@ +/* Definitions for PA_RISC with ELF format on 64-bit Linux + Copyright (C) 1999, 2000, 2002, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#if 0 /* needs some work :-( */ +/* If defined, this macro specifies a table of register pairs used to + eliminate unneeded registers that point into the stack frame. */ + +#define ELIMINABLE_REGS \ +{ \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ +} + +/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'. It + specifies the initial difference between the specified pair of + registers. This macro must be defined if `ELIMINABLE_REGS' is + defined. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + do \ + { \ + int fsize; \ + \ + fsize = compute_frame_size (get_frame_size (), 0); \ + if ((TO) == FRAME_POINTER_REGNUM \ + && (FROM) == ARG_POINTER_REGNUM) \ + { \ + (OFFSET) = -16; \ + break; \ + } \ + \ + gcc_assert ((TO) == STACK_POINTER_REGNUM); \ + \ + switch (FROM) \ + { \ + case FRAME_POINTER_REGNUM: \ + (OFFSET) = - fsize; \ + break; \ + \ + case ARG_POINTER_REGNUM: \ + (OFFSET) = - fsize - 16; \ + break; \ + \ + default: \ + gcc_unreachable (); \ + } \ + } while (0) +#endif diff --git a/gcc/config/pa/pa64-regs.h b/gcc/config/pa/pa64-regs.h new file mode 100644 index 000000000..313577b62 --- /dev/null +++ b/gcc/config/pa/pa64-regs.h @@ -0,0 +1,294 @@ +/* Configuration for GCC-compiler for PA-RISC. + Copyright (C) 1999, 2000, 2003, 2004, 2007, 2008, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Standard register usage. + + It is safe to refer to actual register numbers in this file. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + HP-PA 2.0w has 32 fullword registers and 32 floating point + registers. However, the floating point registers behave + differently: the left and right halves of registers are addressable + as 32-bit registers. + + Due to limitations within GCC itself, we do not expose the left/right + half addressability when in wide mode. This is not a major performance + issue as using the halves independently triggers false dependency stalls + anyway. */ + +#define FIRST_PSEUDO_REGISTER 62 /* 32 general regs + 28 fp regs + + + 1 shift reg + frame pointer */ + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + On the HP-PA, these are: + Reg 0 = 0 (hardware). However, 0 is used for condition code, + so is not fixed. + Reg 1 = ADDIL target/Temporary (hardware). + Reg 2 = Return Pointer + Reg 3 = Frame Pointer + Reg 4 = Frame Pointer (>8k varying frame with HP compilers only) + Reg 4-18 = Preserved Registers + Reg 19 = Linkage Table Register in HPUX 8.0 shared library scheme. + Reg 20-22 = Temporary Registers + Reg 23-26 = Temporary/Parameter Registers + Reg 27 = Global Data Pointer (hp) + Reg 28 = Temporary/Return Value register + Reg 29 = Temporary/Static Chain/Return Value register #2 + Reg 30 = stack pointer + Reg 31 = Temporary/Millicode Return Pointer (hp) + + Freg 0-3 = Status Registers -- Not known to the compiler. + Freg 4-7 = Arguments/Return Value + Freg 8-11 = Temporary Registers + Freg 12-21 = Preserved Registers + Freg 22-31 = Temporary Registers + +*/ + +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 1, 0, 0, 1, 0, \ + /* fp registers */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + /* shift register and soft frame pointer */ \ + 0, 1} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ + {1, 1, 1, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + /* fp registers */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + /* shift register and soft frame pointer */ \ + 1, 1} + +/* Allocate the call used registers first. This should minimize + the number of registers that need to be saved (as call used + registers will generally not be allocated across a call). + + Experimentation has shown slightly better results by allocating + FP registers first. We allocate the caller-saved registers more + or less in reverse order to their allocation as arguments. */ + +#define REG_ALLOC_ORDER \ + { \ + /* caller-saved fp regs. */ \ + 50, 51, 52, 53, 54, 55, 56, 57, \ + 58, 59, 39, 38, 37, 36, 35, 34, \ + 33, 32, \ + /* caller-saved general regs. */ \ + 28, 31, 19, 20, 21, 22, 23, 24, \ + 25, 26, 29, 2, \ + /* callee-saved fp regs. */ \ + 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, \ + /* callee-saved general regs. */ \ + 3, 4, 5, 6, 7, 8, 9, 10, \ + 11, 12, 13, 14, 15, 16, 17, 18, \ + /* special registers. */ \ + 1, 27, 30, 0, 60, 61} + + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + For PA64, GPRs and FPRs hold 64 bits worth. We ignore the 32-bit + addressability of the FPRs and pretend each register holds precisely + WORD_SIZE bits. Note that SCmode values are placed in a single FPR. + Thus, any patterns defined to operate on these values would have to + use the 32-bit addressability of the FPR registers. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* These are the valid FP modes. */ +#define VALID_FP_MODE_P(MODE) \ + ((MODE) == SFmode || (MODE) == DFmode \ + || (MODE) == SCmode || (MODE) == DCmode \ + || (MODE) == SImode || (MODE) == DImode) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. + On the HP-PA, the cpu registers can hold any mode. We + force this to be an even register if it cannot hold the full mode. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + ((REGNO) == 0 \ + ? (MODE) == CCmode || (MODE) == CCFPmode \ + : (REGNO) == 60 ? SCALAR_INT_MODE_P (MODE) \ + /* Make wide modes be in aligned registers. */ \ + : FP_REGNO_P (REGNO) \ + ? (VALID_FP_MODE_P (MODE) \ + && (GET_MODE_SIZE (MODE) <= 8 \ + || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0) \ + || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 3) == 0))) \ + : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD \ + || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD \ + && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28)) \ + || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD \ + && ((REGNO) & 3) == 3 && (REGNO) <= 23))) + +/* How to renumber registers for dbx and gdb. + + Registers 0 - 31 remain unchanged. + + Registers 32 - 59 are mapped to 72, 74, 76 ... + + Register 60 is mapped to 32. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + ((REGNO) <= 31 ? (REGNO) : ((REGNO) < 60 ? (REGNO - 32) * 2 + 72 : 32)) + +/* We must not use the DBX register numbers for the DWARF 2 CFA column + numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER. + Instead use the identity mapping. */ +#define DWARF_FRAME_REGNUM(REG) REG + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + + /* The HP-PA has four kinds of registers: general regs, 1.0 fp regs, + 1.1 fp regs, and the high 1.1 fp regs, to which the operands of + fmpyadd and fmpysub are restricted. */ + +enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS, + GENERAL_OR_FP_REGS, SHIFT_REGS, ALL_REGS, LIM_REG_CLASSES}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ + {"NO_REGS", "R1_REGS", "GENERAL_REGS", "FPUPPER_REGS", "FP_REGS", \ + "GENERAL_OR_FP_REGS", "SHIFT_REGS", "ALL_REGS"} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. Register 0, the "condition code" register, + is in no class. */ + +#define REG_CLASS_CONTENTS \ + {{0x00000000, 0x00000000}, /* NO_REGS */ \ + {0x00000002, 0x00000000}, /* R1_REGS */ \ + {0xfffffffe, 0x20000000}, /* GENERAL_REGS */ \ + {0x00000000, 0x00000000}, /* FPUPPER_REGS */ \ + {0x00000000, 0x0fffffff}, /* FP_REGS */ \ + {0xfffffffe, 0x2fffffff}, /* GENERAL_OR_FP_REGS */ \ + {0x00000000, 0x10000000}, /* SHIFT_REGS */ \ + {0xfffffffe, 0x3fffffff}} /* ALL_REGS */ + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FP_REGS, SHIFT_REGS, LIM_REG_CLASSES \ +} + +/* Defines invalid mode changes. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + pa_cannot_change_mode_class (FROM, TO, CLASS) + +/* Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == 0 ? NO_REGS \ + : (REGNO) == 1 ? R1_REGS \ + : (REGNO) < 32 || (REGNO) == 61 ? GENERAL_REGS \ + : (REGNO) < 60 ? FP_REGS \ + : SHIFT_REGS) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* 1 if N is a possible register number for function argument passing. */ + +#define FUNCTION_ARG_REGNO_P(N) \ + ((((N) >= 19) && (N) <= 26) \ + || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39)) + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ +{"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \ + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ + "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", \ + "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", \ + "%fr4", "%fr5", "%fr6", "%fr7", "%fr8", "%fr9", "%fr10", "%fr11", \ + "%fr12", "%fr13", "%fr14", "%fr15", "%fr16", "%fr17", "%fr18", "%fr19", \ + "%fr20", "%fr21", "%fr22", "%fr23", "%fr24", "%fr25", "%fr26", "%fr27", \ + "%fr28", "%fr29", "%fr30", "%fr31", "SAR", "sfp"} + +#define ADDITIONAL_REGISTER_NAMES \ + {{"%cr11",60}} + +#define FP_SAVED_REG_LAST 49 +#define FP_SAVED_REG_FIRST 40 +#define FP_REG_STEP 1 +#define FP_REG_FIRST 32 +#define FP_REG_LAST 59 diff --git a/gcc/config/pa/pa64-start.h b/gcc/config/pa/pa64-start.h new file mode 100644 index 000000000..9d7b19a37 --- /dev/null +++ b/gcc/config/pa/pa64-start.h @@ -0,0 +1,8 @@ +/* It is currently impossible to switch between PA32 and PA64 based on a + runtime compiler switch. So we might as well lose the overhead with + checking for TARGET_64BIT. */ +#define TARGET_64BIT 1 +#undef TARGET_PA_11 +#define TARGET_PA_11 1 +#undef TARGET_PA_20 +#define TARGET_PA_20 1 diff --git a/gcc/config/pa/predicates.md b/gcc/config/pa/predicates.md new file mode 100644 index 000000000..ff5dc1784 --- /dev/null +++ b/gcc/config/pa/predicates.md @@ -0,0 +1,524 @@ +;; Predicate definitions for HP PA-RISC. +;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Return nonzero only if OP is a register of mode MODE, or +;; CONST0_RTX. + +(define_predicate "reg_or_0_operand" + (match_code "subreg,reg,const_int,const_double") +{ + return (op == CONST0_RTX (mode) || register_operand (op, mode)); +}) + +;; Return nonzero if OP is suitable for use in a call to a named +;; function. +;; +;; For 2.5 try to eliminate either call_operand_address or +;; function_label_operand, they perform very similar functions. + +(define_predicate "call_operand_address" + (match_code "label_ref,symbol_ref,const_int,const_double,const,high") +{ + return (GET_MODE (op) == word_mode + && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME); +}) + +;; Return 1 iff OP is an indexed memory operand. + +(define_predicate "indexed_memory_operand" + (match_code "subreg,mem") +{ + if (GET_MODE (op) != mode) + return 0; + + /* Before reload, a (SUBREG (MEM...)) forces reloading into a register. */ + if (reload_completed && GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode)) + return 0; + + op = XEXP (op, 0); + + return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op)); +}) + +;; Return 1 iff OP is a symbolic operand. +;; Note: an inline copy of this code is present in pa_secondary_reload. + +(define_predicate "symbolic_operand" + (match_code "symbol_ref,label_ref,const") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + return !SYMBOL_REF_TLS_MODEL (op); + case LABEL_REF: + return 1; + case CONST: + op = XEXP (op, 0); + return (GET_CODE (op) == PLUS + && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF + && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0))) + || GET_CODE (XEXP (op, 0)) == LABEL_REF) + && GET_CODE (XEXP (op, 1)) == CONST_INT); + default: + return 0; + } +}) + +;; Return truth value of statement that OP is a symbolic memory +;; operand of mode MODE. + +(define_predicate "symbolic_memory_operand" + (match_code "subreg,mem") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) != MEM) + return 0; + op = XEXP (op, 0); + return ((GET_CODE (op) == SYMBOL_REF && !SYMBOL_REF_TLS_MODEL (op)) + || GET_CODE (op) == CONST || GET_CODE (op) == HIGH + || GET_CODE (op) == LABEL_REF); +}) + +;; Return true if OP is a symbolic operand for the TLS Global Dynamic model. +(define_predicate "tgd_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC"))) + +;; Return true if OP is a symbolic operand for the TLS Local Dynamic model. +(define_predicate "tld_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC"))) + +;; Return true if OP is a symbolic operand for the TLS Initial Exec model. +(define_predicate "tie_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC"))) + +;; Return true if OP is a symbolic operand for the TLS Local Exec model. +(define_predicate "tle_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC"))) + + +;; Return 1 if the operand is a register operand or a non-symbolic +;; memory operand after reload. This predicate is used for branch +;; patterns that internally handle register reloading. We need to +;; accept non-symbolic memory operands after reload to ensure that the +;; pattern is still valid if reload didn't find a hard register for +;; the operand. + +(define_predicate "reg_before_reload_operand" + (match_code "reg,mem") +{ + /* Don't accept a SUBREG since it will need a reload. */ + if (GET_CODE (op) == SUBREG) + return 0; + + if (register_operand (op, mode)) + return 1; + + if (reload_completed + && memory_operand (op, mode) + && !symbolic_memory_operand (op, mode)) + return 1; + + return 0; +}) + +;; Return 1 if the operand is either a register, zero, or a memory +;; operand that is not symbolic. + +(define_predicate "reg_or_0_or_nonsymb_mem_operand" + (match_code "subreg,reg,mem,const_int,const_double") +{ + if (register_operand (op, mode)) + return 1; + + if (op == CONST0_RTX (mode)) + return 1; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (GET_CODE (op) != MEM) + return 0; + + /* Until problems with management of the REG_POINTER flag are resolved, + we need to delay creating move insns with unscaled indexed addresses + until CSE is not expected. */ + if (!TARGET_NO_SPACE_REGS + && !cse_not_expected + && GET_CODE (XEXP (op, 0)) == PLUS + && REG_P (XEXP (XEXP (op, 0), 0)) + && REG_P (XEXP (XEXP (op, 0), 1))) + return 0; + + return (!symbolic_memory_operand (op, mode) + && memory_address_p (mode, XEXP (op, 0))); +}) + +;; Accept anything that can be used as a destination operand for a +;; move instruction. We don't accept indexed memory operands since +;; they are supported only for floating point stores. + +(define_predicate "move_dest_operand" + (match_code "subreg,reg,mem") +{ + if (register_operand (op, mode)) + return 1; + + if (GET_MODE (op) != mode) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode)) + return 0; + + op = XEXP (op, 0); + + return (memory_address_p (mode, op) + && !IS_INDEX_ADDR_P (op) + && !IS_LO_SUM_DLT_ADDR_P (op)); +}) + +;; Accept anything that can be used as a source operand for a move +;; instruction. + +(define_predicate "move_src_operand" + (match_code "subreg,reg,const_int,const_double,mem") +{ + if (register_operand (op, mode)) + return 1; + + if (op == CONST0_RTX (mode)) + return 1; + + if (GET_CODE (op) == CONST_INT) + return cint_ok_for_move (INTVAL (op)); + + if (GET_MODE (op) != mode) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (GET_CODE (op) != MEM) + return 0; + + /* Until problems with management of the REG_POINTER flag are resolved, + we need to delay creating move insns with unscaled indexed addresses + until CSE is not expected. */ + if (!TARGET_NO_SPACE_REGS + && !cse_not_expected + && GET_CODE (XEXP (op, 0)) == PLUS + && REG_P (XEXP (XEXP (op, 0), 0)) + && REG_P (XEXP (XEXP (op, 0), 1))) + return 0; + + return memory_address_p (mode, XEXP (op, 0)); +}) + +;; Accept REG and any CONST_INT that can be moved in one instruction +;; into a general register. + +(define_predicate "reg_or_cint_move_operand" + (match_code "subreg,reg,const_int") +{ + if (register_operand (op, mode)) + return 1; + + return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op))); +}) + +;; TODO: Add a comment here. + +(define_predicate "pic_label_operand" + (match_code "label_ref,const") +{ + if (!flag_pic) + return 0; + + switch (GET_CODE (op)) + { + case LABEL_REF: + return 1; + case CONST: + op = XEXP (op, 0); + return (GET_CODE (XEXP (op, 0)) == LABEL_REF + && GET_CODE (XEXP (op, 1)) == CONST_INT); + default: + return 0; + } +}) + +;; TODO: Add a comment here. + +(define_predicate "fp_reg_operand" + (match_code "reg") +{ + return reg_renumber && FP_REG_P (op); +}) + +;; Return truth value of whether OP can be used as an operand in a +;; three operand arithmetic insn that accepts registers of mode MODE +;; or 14-bit signed integers. + +(define_predicate "arith_operand" + (match_code "subreg,reg,const_int") +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT && INT_14_BITS (op))); +}) + +;; Return truth value of whether OP can be used as an operand in a +;; three operand arithmetic insn that accepts registers of mode MODE +;; or 11-bit signed integers. + +(define_predicate "arith11_operand" + (match_code "subreg,reg,const_int") +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT && INT_11_BITS (op))); +}) + +;; A constant integer suitable for use in a PRE_MODIFY memory +;; reference. + +(define_predicate "pre_cint_operand" + (match_code "const_int") +{ + return (GET_CODE (op) == CONST_INT + && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10); +}) + +;; A constant integer suitable for use in a POST_MODIFY memory +;; reference. + +(define_predicate "post_cint_operand" + (match_code "const_int") +{ + return (GET_CODE (op) == CONST_INT + && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10); +}) + +;; TODO: Add a comment here. + +(define_predicate "arith_double_operand" + (match_code "subreg,reg,const_double") +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_DOUBLE + && GET_MODE (op) == mode + && VAL_14_BITS_P (CONST_DOUBLE_LOW (op)) + && ((CONST_DOUBLE_HIGH (op) >= 0) + == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0)))); +}) + +;; Return truth value of whether OP is an integer which fits the range +;; constraining immediate operands in three-address insns, or is an +;; integer register. + +(define_predicate "ireg_or_int5_operand" + (match_code "const_int,reg") +{ + return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op)) + || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32)); +}) + +;; Return truth value of whether OP is an integer which fits the range +;; constraining immediate operands in three-address insns. + +(define_predicate "int5_operand" + (match_code "const_int") +{ + return (GET_CODE (op) == CONST_INT && INT_5_BITS (op)); +}) + +;; Return truth value of whether OP is an integer which fits the range +;; constraining immediate operands in three-address insns. + +(define_predicate "uint5_operand" + (match_code "const_int") +{ + return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op)); +}) + +;; Return truth value of whether OP is an integer which fits the range +;; constraining immediate operands in three-address insns. + +(define_predicate "int11_operand" + (match_code "const_int") +{ + return (GET_CODE (op) == CONST_INT && INT_11_BITS (op)); +}) + +;; Return truth value of whether OP is an integer which fits the range +;; constraining immediate operands in three-address insns. + +(define_predicate "uint32_operand" + (match_code "const_int,const_double") +{ +#if HOST_BITS_PER_WIDE_INT > 32 + /* All allowed constants will fit a CONST_INT. */ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32)); +#else + return (GET_CODE (op) == CONST_INT + || (GET_CODE (op) == CONST_DOUBLE + && CONST_DOUBLE_HIGH (op) == 0)); +#endif +}) + +;; Return truth value of whether OP is an integer which fits the range +;; constraining immediate operands in three-address insns. + +(define_predicate "arith5_operand" + (match_code "subreg,reg,const_int") +{ + return register_operand (op, mode) || int5_operand (op, mode); +}) + +;; True iff depi or extru can be used to compute (reg & OP). + +(define_predicate "and_operand" + (match_code "subreg,reg,const_int") +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op)))); +}) + +;; True iff depi can be used to compute (reg | OP). + +(define_predicate "cint_ior_operand" + (and (match_code "const_int") + (match_test "ior_mask_p (INTVAL (op))"))) + +;; True iff OP can be used to compute (reg | OP). + +(define_predicate "reg_or_cint_ior_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "cint_ior_operand"))) + +;; True iff OP is a CONST_INT of the forms 0...0xxxx, 0...01...1xxxx, +;; or 1...1xxxx. Such values can be the left hand side x in (x << r), +;; using the zvdepi instruction. + +(define_predicate "lhs_lshift_cint_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT x; + if (GET_CODE (op) != CONST_INT) + return 0; + x = INTVAL (op) >> 4; + return (x & (x + 1)) == 0; +}) + +;; TODO: Add a comment here. + +(define_predicate "lhs_lshift_operand" + (match_code "subreg,reg,const_int") +{ + return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "arith32_operand" + (match_code "subreg,reg,const_int") +{ + return register_operand (op, mode) || GET_CODE (op) == CONST_INT; +}) + +;; TODO: Add a comment here. + +(define_predicate "pc_or_label_operand" + (match_code "pc,label_ref") +{ + return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF); +}) + +;; TODO: Add a comment here. + +(define_predicate "plus_xor_ior_operator" + (match_code "plus,xor,ior") +{ + return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR + || GET_CODE (op) == IOR); +}) + +;; Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are +;; the valid constant for shadd instructions. + +(define_predicate "shadd_operand" + (match_code "const_int") +{ + return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op))); +}) + +;; TODO: Add a comment here. + +(define_predicate "div_operand" + (match_code "reg,const_int") +{ + return (mode == SImode + && ((GET_CODE (op) == REG && REGNO (op) == 25) + || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0 + && INTVAL (op) < 16 && magic_milli[INTVAL (op)]))); +}) + +;; Return nonzero if OP is an integer register, else return zero. + +(define_predicate "ireg_operand" + (match_code "reg") +{ + return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32); +}) + +;; Return 1 if this is a comparison operator. This allows the use of +;; MATCH_OPERATOR to recognize all the branch insns. + +(define_predicate "cmpib_comparison_operator" + (match_code "eq,ne,lt,le,leu,gt,gtu,ge") +{ + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (GET_CODE (op) == EQ + || GET_CODE (op) == NE + || GET_CODE (op) == GT + || GET_CODE (op) == GTU + || GET_CODE (op) == GE + || GET_CODE (op) == LT + || GET_CODE (op) == LE + || GET_CODE (op) == LEU)); +}) + +;; Return 1 if OP is an operator suitable for use in a movb +;; instruction. + +(define_predicate "movb_comparison_operator" + (match_code "eq,ne,lt,ge") +{ + return (GET_CODE (op) == EQ || GET_CODE (op) == NE + || GET_CODE (op) == LT || GET_CODE (op) == GE); +}) diff --git a/gcc/config/pa/quadlib.c b/gcc/config/pa/quadlib.c new file mode 100644 index 000000000..2c1160015 --- /dev/null +++ b/gcc/config/pa/quadlib.c @@ -0,0 +1,245 @@ +/* Subroutines for long double support. + Copyright (C) 2000, 2002, 2004, 2005, 2006, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* HPUX TFmode compare requires a library call to _U_Qfcmp. It takes + a magic number as its third argument which indicates what to do. + The return value is an integer to be compared against zero. The + comparison conditions are the same as those listed in Table 8-12 + of the PA-RISC 2.0 Architecture book for the fcmp instruction. */ + +/* Raise FP_INVALID on SNaN as a side effect. */ +#define QCMP_INV 1 + +/* Comparison relations. */ +#define QCMP_UNORD 2 +#define QCMP_EQ 4 +#define QCMP_LT 8 +#define QCMP_GT 16 + +int _U_Qfcmp (long double a, long double b, int); +long _U_Qfcnvfxt_quad_to_sgl (long double); + +int _U_Qfeq (long double, long double); +int _U_Qfne (long double, long double); +int _U_Qfgt (long double, long double); +int _U_Qfge (long double, long double); +int _U_Qflt (long double, long double); +int _U_Qfle (long double, long double); +int _U_Qfltgt (long double, long double); +int _U_Qfunle (long double, long double); +int _U_Qfunlt (long double, long double); +int _U_Qfunge (long double, long double); +int _U_Qfungt (long double, long double); +int _U_Qfuneq (long double, long double); +int _U_Qfunord (long double, long double); +int _U_Qford (long double, long double); + +int _U_Qfcomp (long double, long double); + +long double _U_Qfneg (long double); +long double _U_Qfcopysign (long double, long double); + +#ifdef __LP64__ +int __U_Qfcnvfxt_quad_to_sgl (long double); +#endif +unsigned int _U_Qfcnvfxt_quad_to_usgl(long double); +long double _U_Qfcnvxf_usgl_to_quad (unsigned int); +unsigned long long _U_Qfcnvfxt_quad_to_udbl(long double); +long double _U_Qfcnvxf_udbl_to_quad (unsigned long long); + +int +_U_Qfeq (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_EQ) != 0); +} + +int +_U_Qfne (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_EQ) == 0); +} + +int +_U_Qfgt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_GT) != 0); +} + +int +_U_Qfge (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_EQ | QCMP_GT) != 0); +} + +int +_U_Qflt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_LT) != 0); +} + +int +_U_Qfle (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_EQ | QCMP_LT) != 0); +} + +int +_U_Qfltgt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_LT | QCMP_GT) != 0); +} + +int +_U_Qfunle (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_EQ | QCMP_LT) != 0); +} + +int +_U_Qfunlt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_LT) != 0); +} + +int +_U_Qfunge (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_EQ | QCMP_GT) != 0); +} + +int +_U_Qfungt (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_GT) != 0); +} + +int +_U_Qfuneq (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_EQ) != 0); +} + +int +_U_Qfunord (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD) != 0); +} + +int +_U_Qford (long double a, long double b) +{ + return (_U_Qfcmp (a, b, QCMP_INV | QCMP_EQ | QCMP_LT | QCMP_GT) != 0); +} + +int +_U_Qfcomp (long double a, long double b) +{ + if (_U_Qfcmp (a, b, QCMP_EQ) == 0) + return 0; + + return (_U_Qfcmp (a, b, QCMP_UNORD | QCMP_EQ | QCMP_GT) != 0 ? 1 : -1); +} + +/* Negate long double A. */ +long double +_U_Qfneg (long double a) +{ + union + { + long double ld; + int i[4]; + } u; + + u.ld = a; + u.i[0] ^= 0x80000000; + return u.ld; +} + +/* Return long double A with sign changed to sign of long double B. */ +long double +_U_Qfcopysign (long double a, long double b) +{ + union + { + long double ld; + int i[4]; + } ua, ub; + + ua.ld = a; + ub.ld = b; + ua.i[0] &= 0x7fffffff; + ua.i[0] |= (0x80000000 & ub.i[0]); + return ua.ld; +} + +#ifdef __LP64__ +/* This routine is only necessary for the PA64 port; for reasons unknown + _U_Qfcnvfxt_quad_to_sgl returns the integer in the high 32bits of the + return value. Ugh. */ +int +__U_Qfcnvfxt_quad_to_sgl (long double a) +{ + return _U_Qfcnvfxt_quad_to_sgl (a) >> 32; +} +#endif + +/* HP only has signed conversion in the C library, so need to synthesize + unsigned versions. */ +unsigned int +_U_Qfcnvfxt_quad_to_usgl (long double a) +{ + extern long long _U_Qfcnvfxt_quad_to_dbl (long double a); + return (unsigned int) _U_Qfcnvfxt_quad_to_dbl (a); +} + +long double +_U_Qfcnvxf_usgl_to_quad (unsigned int a) +{ + extern long double _U_Qfcnvxf_dbl_to_quad (long long); + return _U_Qfcnvxf_dbl_to_quad ((long long) a); +} + +typedef union { + unsigned long long u[2]; + long double d[1]; +} quad_type; + +unsigned long long +_U_Qfcnvfxt_quad_to_udbl (long double a) +{ + extern quad_type _U_Qfcnvfxt_quad_to_quad (long double a); + quad_type u; + u = _U_Qfcnvfxt_quad_to_quad(a); + return u.u[1]; +} + +long double +_U_Qfcnvxf_udbl_to_quad (unsigned long long a) +{ + extern long double _U_Qfcnvxf_quad_to_quad (quad_type a); + quad_type u; + u.u[0] = 0; + u.u[1] = a; + return _U_Qfcnvxf_quad_to_quad (u); +} diff --git a/gcc/config/pa/som.h b/gcc/config/pa/som.h new file mode 100644 index 000000000..73095e5e0 --- /dev/null +++ b/gcc/config/pa/som.h @@ -0,0 +1,341 @@ +/* Definitions for SOM assembler support. + Copyright (C) 1999, 2001, 2002, 2003, 2004, 2005, 2007, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* So we can conditionalize small amounts of code in pa.c or pa.md. */ +#undef TARGET_SOM +#define TARGET_SOM 1 + +/* We do not use BINCL stabs in SOM. + ??? If it does not hurt, we probably should to avoid useless divergence + from other embedded stabs implementations. */ +#undef DBX_USE_BINCL + +#define DBX_LINES_FUNCTION_RELATIVE 1 + +/* gdb needs a null N_SO at the end of each file for scattered loading. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +/* HPUX has a program 'chatr' to list the dependencies of dynamically + linked executables and shared libraries. */ +#define LDD_SUFFIX "chatr" +/* Look for lines like "dynamic /usr/lib/X11R5/libX11.sl" + or "static /usr/lib/X11R5/libX11.sl". + + HPUX 10.20 also has lines like "static branch prediction ..." + so we filter that out explicitly. + + We also try to bound our search for libraries with marker + lines. What a pain. */ +#define PARSE_LDD_OUTPUT(PTR) \ +do { \ + static int in_shlib_list = 0; \ + while (*PTR == ' ') PTR++; \ + if (strncmp (PTR, "shared library list:", \ + sizeof ("shared library list:") - 1) == 0) \ + { \ + PTR = 0; \ + in_shlib_list = 1; \ + } \ + else if (strncmp (PTR, "shared library binding:", \ + sizeof ("shared library binding:") - 1) == 0)\ + { \ + PTR = 0; \ + in_shlib_list = 0; \ + } \ + else if (strncmp (PTR, "static branch prediction disabled", \ + sizeof ("static branch prediction disabled") - 1) == 0)\ + { \ + PTR = 0; \ + in_shlib_list = 0; \ + } \ + else if (in_shlib_list \ + && strncmp (PTR, "dynamic", sizeof ("dynamic") - 1) == 0) \ + { \ + PTR += sizeof ("dynamic") - 1; \ + while (*p == ' ') PTR++; \ + } \ + else if (in_shlib_list \ + && strncmp (PTR, "static", sizeof ("static") - 1) == 0) \ + { \ + PTR += sizeof ("static") - 1; \ + while (*p == ' ') PTR++; \ + } \ + else \ + PTR = 0; \ +} while (0) + +/* Output the label for a function definition. */ +#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED +#define ASM_DOUBLE_ARG_DESCRIPTORS(FILE, ARG0, ARG1) \ + do { fprintf (FILE, ",ARGW%d=FR", (ARG0)); \ + fprintf (FILE, ",ARGW%d=FU", (ARG1));} while (0) +#define DFMODE_RETURN_STRING ",RTNVAL=FU" +#define SFMODE_RETURN_STRING ",RTNVAL=FR" +#else +#define ASM_DOUBLE_ARG_DESCRIPTORS(FILE, ARG0, ARG1) \ + do { fprintf (FILE, ",ARGW%d=FU", (ARG0)); \ + fprintf (FILE, ",ARGW%d=FR", (ARG1));} while (0) +#define DFMODE_RETURN_STRING ",RTNVAL=FR" +#define SFMODE_RETURN_STRING ",RTNVAL=FU" +#endif + + +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do { tree fntype = TREE_TYPE (TREE_TYPE (DECL)); \ + tree tree_type = TREE_TYPE (DECL); \ + tree parm; \ + int i; \ + if (TREE_PUBLIC (DECL) || TARGET_GAS) \ + { \ + if (TREE_PUBLIC (DECL)) \ + { \ + fputs ("\t.EXPORT ", FILE); \ + assemble_name (FILE, NAME); \ + fputs (",ENTRY,PRIV_LEV=3", FILE); \ + } \ + else \ + { \ + fputs ("\t.PARAM ", FILE); \ + assemble_name (FILE, NAME); \ + fputs (",PRIV_LEV=3", FILE); \ + } \ + for (parm = DECL_ARGUMENTS (DECL), i = 0; parm && i < 4; \ + parm = DECL_CHAIN (parm)) \ + { \ + if (TYPE_MODE (DECL_ARG_TYPE (parm)) == SFmode \ + && ! TARGET_SOFT_FLOAT) \ + fprintf (FILE, ",ARGW%d=FR", i++); \ + else if (TYPE_MODE (DECL_ARG_TYPE (parm)) == DFmode \ + && ! TARGET_SOFT_FLOAT) \ + { \ + if (i <= 2) \ + { \ + if (i == 1) i++; \ + ASM_DOUBLE_ARG_DESCRIPTORS (FILE, i++, i++); \ + } \ + else \ + break; \ + } \ + else \ + { \ + int arg_size = \ + FUNCTION_ARG_SIZE (TYPE_MODE (DECL_ARG_TYPE (parm)),\ + DECL_ARG_TYPE (parm)); \ + /* Passing structs by invisible reference uses \ + one general register. */ \ + if (arg_size > 2 \ + || TREE_ADDRESSABLE (DECL_ARG_TYPE (parm))) \ + arg_size = 1; \ + if (arg_size == 2 && i <= 2) \ + { \ + if (i == 1) i++; \ + fprintf (FILE, ",ARGW%d=GR", i++); \ + fprintf (FILE, ",ARGW%d=GR", i++); \ + } \ + else if (arg_size == 1) \ + fprintf (FILE, ",ARGW%d=GR", i++); \ + else \ + i += arg_size; \ + } \ + } \ + /* anonymous args */ \ + if (stdarg_p (tree_type)) \ + { \ + for (; i < 4; i++) \ + fprintf (FILE, ",ARGW%d=GR", i); \ + } \ + if (TYPE_MODE (fntype) == DFmode && ! TARGET_SOFT_FLOAT) \ + fputs (DFMODE_RETURN_STRING, FILE); \ + else if (TYPE_MODE (fntype) == SFmode && ! TARGET_SOFT_FLOAT) \ + fputs (SFMODE_RETURN_STRING, FILE); \ + else if (fntype != void_type_node) \ + fputs (",RTNVAL=GR", FILE); \ + fputs ("\n", FILE); \ + }} while (0) + +#define TARGET_ASM_FILE_START pa_som_file_start +#define TARGET_ASM_INIT_SECTIONS pa_som_asm_init_sections + +/* String to output before writable data. */ +#define DATA_SECTION_ASM_OP "\t.SPACE $PRIVATE$\n\t.SUBSPA $DATA$\n" + +/* String to output before uninitialized data. */ +#define BSS_SECTION_ASM_OP "\t.SPACE $PRIVATE$\n\t.SUBSPA $BSS$\n" + +/* This is how to output a command to make the user-level label + named NAME defined for reference from other files. We use + assemble_name_raw instead of assemble_name since a symbol in + a .IMPORT directive that isn't otherwise referenced is not + placed in the symbol table of the assembled object. + + Failure to import a function reference can cause the HP linker + to segmentation fault! + + Note that the SOM based tools need the symbol imported as a + CODE symbol, while the ELF based tools require the symbol to + be imported as an ENTRY symbol. */ + +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + pa_hpux_asm_output_external ((FILE), (DECL), (NAME)) +#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \ + do { fputs ("\t.IMPORT ", FILE); \ + assemble_name_raw (FILE, NAME); \ + if (FUNCTION_NAME_P (NAME)) \ + fputs (",CODE\n", FILE); \ + else \ + fputs (",DATA\n", FILE); \ + } while (0) + +/* The bogus HP assembler requires ALL external references to be + "imported", even library calls. They look a bit different, so + here's this macro. + + Also note not all libcall names are passed to pa_encode_section_info + (__main for example). To make sure all libcall names have section + info recorded in them, we do it here. + + We must also ensure that a libcall that has been previously + exported is not subsequently imported since the HP assembler may + change the type from an ENTRY to a CODE symbol. This would make + the symbol local. We are forced to use the identifier node + associated with the real assembler name for this check as the + symbol_ref available in ASM_DECLARE_FUNCTION_NAME is not the + same as the one used here. As a result, we can't use flags + in the symbol_ref for this check. The identifier check assumes + assemble_external_libcall is called before the symbol is used. */ + +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, RTL) \ + do { const char *name; \ + tree id; \ + \ + if (!function_label_operand (RTL, VOIDmode)) \ + hppa_encode_label (RTL); \ + \ + name = targetm.strip_name_encoding (XSTR ((RTL), 0)); \ + id = maybe_get_identifier (name); \ + if (!id || !TREE_SYMBOL_REFERENCED (id)) \ + { \ + fputs ("\t.IMPORT ", FILE); \ + assemble_name_raw (FILE, XSTR ((RTL), 0)); \ + fputs (",CODE\n", FILE); \ + } \ + } while (0) + +/* We want __gcc_plt_call to appear in every program built by + gcc, so we make a reference to it out of __main. + We use the asm statement to fool the optimizer into not + removing the dead (but important) initialization of + REFERENCE. */ + +#define DO_GLOBAL_DTORS_BODY \ +do { \ + extern void __gcc_plt_call (void); \ + void (*reference)(void) = &__gcc_plt_call; \ + func_ptr *p; \ + __asm__ ("" : : "r" (reference)); \ + for (p = __DTOR_LIST__ + 1; *p; ) \ + (*p++) (); \ +} while (0) + +/* This macro specifies the biggest alignment supported by the object + file format of this machine. + + The .align directive in the HP assembler allows alignments up to 4096 + bytes. However, the maximum alignment of a global common symbol is 8 + bytes for objects smaller than the page size (4096 bytes). For larger + objects, the linker provides an alignment of 32 bytes. Unfortunately, + this macro doesn't provide a mechanism to test for common symbols. */ +#define MAX_OFILE_ALIGNMENT 32768 + +/* The SOM linker hardcodes paths into binaries. As a result, dotdots + must be removed from library prefixes to prevent binaries from depending + on the location of the GCC tool directory. The downside is GCC + cannot be moved after installation using a symlink. */ +#define ALWAYS_STRIP_DOTDOT 1 + +/* If GAS supports weak, we can support weak when we have working linker + support for secondary definitions and are generating code for GAS. + This is primarily for one-only support as SOM doesn't allow undefined + weak symbols. */ +#ifdef HAVE_GAS_WEAK +#define TARGET_SUPPORTS_WEAK (TARGET_SOM_SDEF && TARGET_GAS) +#else +#define TARGET_SUPPORTS_WEAK 0 +#endif + +/* CVS GAS as of 4/28/04 supports a comdat parameter for the .nsubspa + directive. This provides one-only linkage semantics even though we + don't have weak support. */ +#ifdef HAVE_GAS_NSUBSPA_COMDAT +#define SUPPORTS_SOM_COMDAT (TARGET_GAS) +#else +#define SUPPORTS_SOM_COMDAT 0 +#endif + +/* We can support one only if we support weak or comdat. */ +#define SUPPORTS_ONE_ONLY (TARGET_SUPPORTS_WEAK || SUPPORTS_SOM_COMDAT) + +/* We use DECL_COMMON for uninitialized one-only variables as we don't + have linkonce .bss. We use SOM secondary definitions or comdat for + initialized variables and functions. */ +#define MAKE_DECL_ONE_ONLY(DECL) \ + do { \ + if (TREE_CODE (DECL) == VAR_DECL \ + && (DECL_INITIAL (DECL) == 0 \ + || DECL_INITIAL (DECL) == error_mark_node)) \ + DECL_COMMON (DECL) = 1; \ + else if (TARGET_SUPPORTS_WEAK) \ + DECL_WEAK (DECL) = 1; \ + } while (0) + +/* This is how we tell the assembler that a symbol is weak. The SOM + weak implementation uses the secondary definition (sdef) flag. + + The behavior of sdef symbols is similar to ELF weak symbols in that + multiple definitions can occur without incurring a link error. + However, they differ in the following ways: + 1) Undefined sdef symbols are not allowed. + 2) The linker searches for undefined sdef symbols and will load an + archive library member to resolve an undefined sdef symbol. + 3) The exported symbol from a shared library is a primary symbol + rather than a sdef symbol. Thus, more care is needed in the + ordering of libraries. + + It appears that the linker discards extra copies of "weak" functions + when linking shared libraries, independent of whether or not they + are in their own section. In linking final executables, -Wl,-O can + be used to remove dead procedures. Thus, support for named sections + is not needed and in previous testing caused problems with various + HP tools. */ +#define ASM_WEAKEN_LABEL(FILE,NAME) \ + do { fputs ("\t.weak\t", FILE); \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ + targetm.asm_out.globalize_label (FILE, NAME); \ + } while (0) + +/* We can't handle weak aliases, and therefore can't support pragma weak. + Suppress the use of pragma weak in gthr-dce.h and gthr-posix.h. */ +#define GTHREAD_USE_WEAK 0 + +/* Shared library suffix. Collect2 strips the version string after + this suffix when generating constructor/destructor names. */ +#define SHLIB_SUFFIX ".sl" diff --git a/gcc/config/pa/stublib.c b/gcc/config/pa/stublib.c new file mode 100644 index 000000000..d3cf559c8 --- /dev/null +++ b/gcc/config/pa/stublib.c @@ -0,0 +1,97 @@ +/* Stub functions. + Copyright (C) 2006, 2009, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifdef L_register_frame_info +struct object; +void __register_frame_info (const void * __attribute__((unused)), + struct object * __attribute__((unused))); +void +__register_frame_info (const void *p, struct object *ob) +{ +} +#endif + +#ifdef L_deregister_frame_info +void *__deregister_frame_info (const void * __attribute__((unused))); +void * +__deregister_frame_info (const void *p) +{ + return (void *)0; +} +#endif + +#ifdef L_cxa_finalize +void __cxa_finalize (void * __attribute__((unused))); +void +__cxa_finalize (void *p) +{ +} +#endif + +#ifdef L_Jv_RegisterClasses +void _Jv_RegisterClasses (void * __attribute__((unused))); +void +_Jv_RegisterClasses (void *p) +{ +} +#endif + +#ifdef L_pthread_default_stacksize_np +int pthread_default_stacksize_np (unsigned long __attribute__((unused)), + unsigned long *); +int +pthread_default_stacksize_np (unsigned long new, unsigned long *old) +{ + if (old) + *old = 0; + return 0; +} +#endif + +#ifdef L_pthread_mutex_lock +int pthread_mutex_lock (void); +int +pthread_mutex_lock (void) +{ + return 0; +} +#endif + +#ifdef L_pthread_mutex_unlock +int pthread_mutex_unlock (void); +int +pthread_mutex_unlock (void) +{ + return 0; +} +#endif + +#ifdef L_pthread_once +int pthread_once (void); +int +pthread_once (void) +{ + return 0; +} +#endif diff --git a/gcc/config/pa/t-dce-thr b/gcc/config/pa/t-dce-thr new file mode 100644 index 000000000..8d86a4181 --- /dev/null +++ b/gcc/config/pa/t-dce-thr @@ -0,0 +1,5 @@ +MULTILIB_OPTIONS = threads +MULTILIB_DIRNAMES = threads + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/pa/t-hpux-shlib b/gcc/config/pa/t-hpux-shlib new file mode 100644 index 000000000..d5a5b6c86 --- /dev/null +++ b/gcc/config/pa/t-hpux-shlib @@ -0,0 +1,46 @@ +# Copyright (C) 2001, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Build a shared libgcc library. +SHLIB_EXT = .sl +SHLIB_NAME = @shlib_base_name@$(SHLIB_EXT) +SHLIB_SOVERSION = 1 +SHLIB_SONAME = @shlib_base_name@.$(SHLIB_SOVERSION) +SHLIB_OBJS = @shlib_objs@ +SHLIB_DIR = @multilib_dir@ +SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@ + +SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \ + -Wl,+h -Wl,$(SHLIB_SONAME) \ + -o $(SHLIB_DIR)/$(SHLIB_NAME).tmp @multilib_flags@ $(SHLIB_OBJS) && \ + rm -f $(SHLIB_DIR)/$(SHLIB_SONAME) && \ + if [ -f $(SHLIB_DIR)/$(SHLIB_NAME) ]; then \ + mv -f $(SHLIB_DIR)/$(SHLIB_NAME) $(SHLIB_DIR)/$(SHLIB_NAME).backup; \ + else true; fi && \ + mv $(SHLIB_DIR)/$(SHLIB_NAME).tmp $(SHLIB_DIR)/$(SHLIB_NAME) && \ + $(LN_S) $(SHLIB_NAME) $(SHLIB_DIR)/$(SHLIB_SONAME) + +# $(slibdir) double quoted to protect it from expansion while building +# libgcc.mk. We want this delayed until actual install time. +SHLIB_INSTALL = \ + $$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \ + $(INSTALL_DATA) -m 555 $(SHLIB_DIR)/$(SHLIB_NAME) \ + $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \ + rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_NAME); \ + $(LN_S) $(SHLIB_SONAME) \ + $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_NAME) diff --git a/gcc/config/pa/t-linux b/gcc/config/pa/t-linux new file mode 100644 index 000000000..ba42ad03c --- /dev/null +++ b/gcc/config/pa/t-linux @@ -0,0 +1,39 @@ +# Copyright (C) 1999, 2001, 2002, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +#Plug millicode routines into libgcc.a We want these on both native and +#cross compiles. We use the "64-bit" routines because the "32-bit" code +#is broken for certain corner cases. + +LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall +LIB1ASMSRC = pa/milli64.S + +# Compile libgcc2.a as PIC. +TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1 + +LIB2FUNCS_EXTRA=fptr.c +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c + +fptr.c: $(srcdir)/config/pa/fptr.c + rm -f fptr.c + cp $(srcdir)/config/pa/fptr.c . + +# Compile crtbeginS.o and crtendS.o as PIC. +CRTSTUFF_T_CFLAGS_S = -fPIC + +MULTIARCH_DIRNAME = $(call if_multiarch,hppa-linux-gnu) diff --git a/gcc/config/pa/t-linux64 b/gcc/config/pa/t-linux64 new file mode 100644 index 000000000..cfa73606f --- /dev/null +++ b/gcc/config/pa/t-linux64 @@ -0,0 +1,34 @@ +# Copyright (C) 2001, 2008, 2012 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +#Plug millicode routines into libgcc.a We want these on both native and +#cross compiles. + +LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI +LIB1ASMSRC = pa/milli64.S + +# Compile crtbeginS.o and crtendS.o as PIC. +# Actually, hppa64 is always PIC but adding -fPIC does no harm. +CRTSTUFF_T_CFLAGS_S = -fPIC + +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c + +# Compile libgcc2.a as PIC. +TARGET_LIBGCC2_CFLAGS = -fPIC -Dpa64=1 -DELF=1 + +MULTIARCH_DIRNAME = $(call if_multiarch,hppa-linux-gnu) diff --git a/gcc/config/pa/t-pa b/gcc/config/pa/t-pa new file mode 100644 index 000000000..cad060da0 --- /dev/null +++ b/gcc/config/pa/t-pa @@ -0,0 +1,7 @@ +TARGET_LIBGCC2_CFLAGS = -fPIC + +LIB2FUNCS_EXTRA=lib2funcs.asm + +lib2funcs.asm: $(srcdir)/config/pa/lib2funcs.asm + rm -f lib2funcs.asm + cp $(srcdir)/config/pa/lib2funcs.asm . diff --git a/gcc/config/pa/t-pa-hpux b/gcc/config/pa/t-pa-hpux new file mode 100644 index 000000000..63eab6362 --- /dev/null +++ b/gcc/config/pa/t-pa-hpux @@ -0,0 +1,7 @@ +lib2funcs.asm: $(srcdir)/config/pa/lib2funcs.asm + rm -f lib2funcs.asm + cp $(srcdir)/config/pa/lib2funcs.asm . + +quadlib.c: $(srcdir)/config/pa/quadlib.c + rm -f quadlib.c + cp $(srcdir)/config/pa/quadlib.c . diff --git a/gcc/config/pa/t-pa-hpux10 b/gcc/config/pa/t-pa-hpux10 new file mode 100644 index 000000000..fd7ff4842 --- /dev/null +++ b/gcc/config/pa/t-pa-hpux10 @@ -0,0 +1,2 @@ +TARGET_LIBGCC2_CFLAGS = -fPIC -frandom-seed=fixed-seed -D_T_HPUX10 +LIB2FUNCS_EXTRA=lib2funcs.asm quadlib.c diff --git a/gcc/config/pa/t-pa-hpux11 b/gcc/config/pa/t-pa-hpux11 new file mode 100644 index 000000000..4436b4ca6 --- /dev/null +++ b/gcc/config/pa/t-pa-hpux11 @@ -0,0 +1,31 @@ +TARGET_LIBGCC2_CFLAGS = -fPIC -frandom-seed=fixed-seed +LIB2FUNCS_EXTRA=lib2funcs.asm quadlib.c +LIBGCCSTUB_OBJS = pthread_default_stacksize_np-stub.o \ + pthread_mutex_lock-stub.o \ + pthread_mutex_unlock-stub.o \ + pthread_once-stub.o + +stublib.c: $(srcdir)/config/pa/stublib.c + rm -f stublib.c + cp $(srcdir)/config/pa/stublib.c . + +pthread_default_stacksize_np-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_default_stacksize_np stublib.c \ + -o pthread_default_stacksize_np-stub.o + +pthread_mutex_lock-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_lock stublib.c \ + -o pthread_mutex_lock-stub.o + +pthread_mutex_unlock-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_unlock stublib.c \ + -o pthread_mutex_unlock-stub.o + +pthread_once-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_once stublib.c \ + -o pthread_once-stub.o + +$(T)libgcc_stub.a: $(LIBGCCSTUB_OBJS) + -rm -rf $(T)libgcc_stub.a + $(AR) rc $(T)libgcc_stub.a $(LIBGCCSTUB_OBJS) + $(RANLIB) $(T)libgcc_stub.a diff --git a/gcc/config/pa/t-pa64 b/gcc/config/pa/t-pa64 new file mode 100644 index 000000000..e6ac7a5bb --- /dev/null +++ b/gcc/config/pa/t-pa64 @@ -0,0 +1,67 @@ +# Copyright (C) 2000, 2001, 2002, 2004, 2006, +# 2007, 2010 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +TARGET_LIBGCC2_CFLAGS = -fPIC -Dpa64=1 -DELF=1 -mlong-calls +LIB2FUNCS_EXTRA = quadlib.c +LIBGCCSTUB_OBJS = rfi-stub.o dfi-stub.o jvrc-stub.o cxaf-stub.o \ + pthread_default_stacksize_np-stub.o \ + pthread_mutex_lock-stub.o \ + pthread_mutex_unlock-stub.o \ + pthread_once-stub.o + +stublib.c: $(srcdir)/config/pa/stublib.c + rm -f stublib.c + cp $(srcdir)/config/pa/stublib.c . + +rfi-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_register_frame_info stublib.c \ + -o rfi-stub.o + +dfi-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_deregister_frame_info stublib.c \ + -o dfi-stub.o + +cxaf-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_cxa_finalize stublib.c \ + -o cxaf-stub.o + +jvrc-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_Jv_RegisterClasses stublib.c \ + -o jvrc-stub.o + +pthread_default_stacksize_np-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_default_stacksize_np stublib.c \ + -o pthread_default_stacksize_np-stub.o + +pthread_mutex_lock-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_lock stublib.c \ + -o pthread_mutex_lock-stub.o + +pthread_mutex_unlock-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_unlock stublib.c \ + -o pthread_mutex_unlock-stub.o + +pthread_once-stub.o: stublib.c $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -O2 -DL_pthread_once stublib.c \ + -o pthread_once-stub.o + +$(T)libgcc_stub.a: $(LIBGCCSTUB_OBJS) + -rm -rf $(T)libgcc_stub.a + $(AR) rc $(T)libgcc_stub.a $(LIBGCCSTUB_OBJS) + $(RANLIB) $(T)libgcc_stub.a diff --git a/gcc/config/pa/t-slibgcc-dwarf-ver b/gcc/config/pa/t-slibgcc-dwarf-ver new file mode 100644 index 000000000..fa4688d69 --- /dev/null +++ b/gcc/config/pa/t-slibgcc-dwarf-ver @@ -0,0 +1,3 @@ +# Set the version number of the shared libgcc library (DWARF2 EH). + +SHLIB_SOVERSION = 4 diff --git a/gcc/config/pa/t-slibgcc-sjlj-ver b/gcc/config/pa/t-slibgcc-sjlj-ver new file mode 100644 index 000000000..00140cf20 --- /dev/null +++ b/gcc/config/pa/t-slibgcc-sjlj-ver @@ -0,0 +1,3 @@ +# Set the version number of the shared libgcc library (SJLJ EH). + +SHLIB_SOVERSION = 3 |