From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/config/i386/i386.md | 18347 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 18347 insertions(+) create mode 100644 gcc/config/i386/i386.md (limited to 'gcc/config/i386/i386.md') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md new file mode 100644 index 000000000..3a27ca46c --- /dev/null +++ b/gcc/config/i386/i386.md @@ -0,0 +1,18347 @@ +;; GCC machine description for IA-32 and x86-64. +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 +;; Free Software Foundation, Inc. +;; Mostly by William Schelter. +;; x86_64 support added by Jan Hubicka +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ +;; +;; The original PO technology requires these to be ordered by speed, +;; so that assigner will pick the fastest. +;; +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. +;; +;; The special asm out single letter directives following a '%' are: +;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. +;; C -- print opcode suffix for set/cmov insn. +;; c -- like C, but print reversed condition +;; F,f -- likewise, but for floating-point. +;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", +;; otherwise nothing +;; R -- print the prefix for register names. +;; z -- print the opcode suffix for the size of the current operand. +;; Z -- likewise, with special suffixes for x87 instructions. +;; * -- print a star (in certain assembler syntax) +;; A -- print an absolute memory reference. +;; w -- print the operand as if it's a "word" (HImode) even if it isn't. +;; s -- print a shift double count, followed by the assemblers argument +;; delimiter. +;; b -- print the QImode name of the register for the indicated operand. +;; %b0 would print %al if operands[0] is reg 0. +;; w -- likewise, print the HImode name of the register. +;; k -- likewise, print the SImode name of the register. +;; q -- likewise, print the DImode name of the register. +;; x -- likewise, print the V4SFmode name of the register. +;; t -- likewise, print the V8SFmode name of the register. +;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh. +;; y -- print "st(0)" instead of "st" as a register. +;; d -- print duplicated register operand for AVX instruction. +;; D -- print condition for SSE cmp instruction. +;; P -- if PIC, print an @PLT suffix. +;; X -- don't print any sort of PIC '@' suffix for a symbol. +;; & -- print some in-use local-dynamic symbol name. +;; H -- print a memory address offset by 8; used for sse high-parts +;; Y -- print condition for XOP pcom* instruction. +;; + -- print a branch hint as 'cs' or 'ds' prefix +;; ; -- print a semicolon (after prefixes due to bug in older gas). +;; @ -- print a segment register of thread base pointer load + +;; UNSPEC usage: + +(define_c_enum "unspec" [ + ;; Relocation specifiers + UNSPEC_GOT + UNSPEC_GOTOFF + UNSPEC_GOTPCREL + UNSPEC_GOTTPOFF + UNSPEC_TPOFF + UNSPEC_NTPOFF + UNSPEC_DTPOFF + UNSPEC_GOTNTPOFF + UNSPEC_INDNTPOFF + UNSPEC_PLTOFF + UNSPEC_MACHOPIC_OFFSET + UNSPEC_PCREL + + ;; Prologue support + UNSPEC_STACK_ALLOC + UNSPEC_SET_GOT + UNSPEC_REG_SAVE + UNSPEC_DEF_CFA + UNSPEC_SET_RIP + UNSPEC_SET_GOT_OFFSET + UNSPEC_MEMORY_BLOCKAGE + UNSPEC_STACK_CHECK + + ;; TLS support + UNSPEC_TP + UNSPEC_TLS_GD + UNSPEC_TLS_LD_BASE + UNSPEC_TLSDESC + UNSPEC_TLS_IE_SUN + + ;; Other random patterns + UNSPEC_SCAS + UNSPEC_FNSTSW + UNSPEC_SAHF + UNSPEC_PARITY + UNSPEC_FSTCW + UNSPEC_ADD_CARRY + UNSPEC_FLDCW + UNSPEC_REP + UNSPEC_LD_MPIC ; load_macho_picbase + UNSPEC_TRUNC_NOOP + UNSPEC_DIV_ALREADY_SPLIT + UNSPEC_CALL_NEEDS_VZEROUPPER + + ;; For SSE/MMX support: + UNSPEC_FIX_NOTRUNC + UNSPEC_MASKMOV + UNSPEC_MOVMSK + UNSPEC_MOVNT + UNSPEC_MOVU + UNSPEC_RCP + UNSPEC_RSQRT + UNSPEC_SFENCE + UNSPEC_PFRCP + UNSPEC_PFRCPIT1 + UNSPEC_PFRCPIT2 + UNSPEC_PFRSQRT + UNSPEC_PFRSQIT1 + UNSPEC_MFENCE + UNSPEC_LFENCE + UNSPEC_PSADBW + UNSPEC_LDDQU + UNSPEC_MS_TO_SYSV_CALL + + ;; Generic math support + UNSPEC_COPYSIGN + UNSPEC_IEEE_MIN ; not commutative + UNSPEC_IEEE_MAX ; not commutative + + ;; x87 Floating point + UNSPEC_SIN + UNSPEC_COS + UNSPEC_FPATAN + UNSPEC_FYL2X + UNSPEC_FYL2XP1 + UNSPEC_FRNDINT + UNSPEC_FIST + UNSPEC_F2XM1 + UNSPEC_TAN + UNSPEC_FXAM + + ;; x87 Rounding + UNSPEC_FRNDINT_FLOOR + UNSPEC_FRNDINT_CEIL + UNSPEC_FRNDINT_TRUNC + UNSPEC_FRNDINT_MASK_PM + UNSPEC_FIST_FLOOR + UNSPEC_FIST_CEIL + + ;; x87 Double output FP + UNSPEC_SINCOS_COS + UNSPEC_SINCOS_SIN + UNSPEC_XTRACT_FRACT + UNSPEC_XTRACT_EXP + UNSPEC_FSCALE_FRACT + UNSPEC_FSCALE_EXP + UNSPEC_FPREM_F + UNSPEC_FPREM_U + UNSPEC_FPREM1_F + UNSPEC_FPREM1_U + + UNSPEC_C2_FLAG + UNSPEC_FXAM_MEM + + ;; SSP patterns + UNSPEC_SP_SET + UNSPEC_SP_TEST + UNSPEC_SP_TLS_SET + UNSPEC_SP_TLS_TEST + + ;; SSSE3 + UNSPEC_PSHUFB + UNSPEC_PSIGN + UNSPEC_PALIGNR + + ;; For SSE4A support + UNSPEC_EXTRQI + UNSPEC_EXTRQ + UNSPEC_INSERTQI + UNSPEC_INSERTQ + + ;; For SSE4.1 support + UNSPEC_BLENDV + UNSPEC_INSERTPS + UNSPEC_DP + UNSPEC_MOVNTDQA + UNSPEC_MPSADBW + UNSPEC_PHMINPOSUW + UNSPEC_PTEST + UNSPEC_ROUND + + ;; For SSE4.2 support + UNSPEC_CRC32 + UNSPEC_PCMPESTR + UNSPEC_PCMPISTR + + ;; For FMA4 support + UNSPEC_FMADDSUB + UNSPEC_XOP_UNSIGNED_CMP + UNSPEC_XOP_TRUEFALSE + UNSPEC_XOP_PERMUTE + UNSPEC_FRCZ + + ;; For AES support + UNSPEC_AESENC + UNSPEC_AESENCLAST + UNSPEC_AESDEC + UNSPEC_AESDECLAST + UNSPEC_AESIMC + UNSPEC_AESKEYGENASSIST + + ;; For PCLMUL support + UNSPEC_PCLMUL + + ;; For AVX support + UNSPEC_PCMP + UNSPEC_VPERMIL + UNSPEC_VPERMIL2 + UNSPEC_VPERMIL2F128 + UNSPEC_MASKLOAD + UNSPEC_MASKSTORE + UNSPEC_CAST + UNSPEC_VTESTP + UNSPEC_VCVTPH2PS + UNSPEC_VCVTPS2PH + + ;; For BMI support + UNSPEC_BEXTR +]) + +(define_c_enum "unspecv" [ + UNSPECV_BLOCKAGE + UNSPECV_STACK_PROBE + UNSPECV_PROBE_STACK_RANGE + UNSPECV_EMMS + UNSPECV_LDMXCSR + UNSPECV_STMXCSR + UNSPECV_FEMMS + UNSPECV_CLFLUSH + UNSPECV_ALIGN + UNSPECV_MONITOR + UNSPECV_MWAIT + UNSPECV_CMPXCHG + UNSPECV_XCHG + UNSPECV_LOCK + UNSPECV_PROLOGUE_USE + UNSPECV_CLD + UNSPECV_NOPS + UNSPECV_VZEROALL + UNSPECV_VZEROUPPER + UNSPECV_RDTSC + UNSPECV_RDTSCP + UNSPECV_RDPMC + UNSPECV_LLWP_INTRINSIC + UNSPECV_SLWP_INTRINSIC + UNSPECV_LWPVAL_INTRINSIC + UNSPECV_LWPINS_INTRINSIC + UNSPECV_RDFSBASE + UNSPECV_RDGSBASE + UNSPECV_WRFSBASE + UNSPECV_WRGSBASE + UNSPECV_SPLIT_STACK_RETURN + + ;; For RDRAND support + UNSPECV_RDRAND +]) + +;; Constants to represent pcomtrue/pcomfalse variants +(define_constants + [(PCOM_FALSE 0) + (PCOM_TRUE 1) + (COM_FALSE_S 2) + (COM_FALSE_P 3) + (COM_TRUE_S 4) + (COM_TRUE_P 5) + ]) + +;; Constants used in the XOP pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + +;; Registers by name. +(define_constants + [(AX_REG 0) + (DX_REG 1) + (CX_REG 2) + (BX_REG 3) + (SI_REG 4) + (DI_REG 5) + (BP_REG 6) + (SP_REG 7) + (ST0_REG 8) + (ST1_REG 9) + (ST2_REG 10) + (ST3_REG 11) + (ST4_REG 12) + (ST5_REG 13) + (ST6_REG 14) + (ST7_REG 15) + (FLAGS_REG 17) + (FPSR_REG 18) + (FPCR_REG 19) + (XMM0_REG 21) + (XMM1_REG 22) + (XMM2_REG 23) + (XMM3_REG 24) + (XMM4_REG 25) + (XMM5_REG 26) + (XMM6_REG 27) + (XMM7_REG 28) + (MM0_REG 29) + (MM1_REG 30) + (MM2_REG 31) + (MM3_REG 32) + (MM4_REG 33) + (MM5_REG 34) + (MM6_REG 35) + (MM7_REG 36) + (R8_REG 37) + (R9_REG 38) + (R10_REG 39) + (R11_REG 40) + (R12_REG 41) + (R13_REG 42) + (XMM8_REG 45) + (XMM9_REG 46) + (XMM10_REG 47) + (XMM11_REG 48) + (XMM12_REG 49) + (XMM13_REG 50) + (XMM14_REG 51) + (XMM15_REG 52) + ]) + +;; Insns whose names begin with "x86_" are emitted by gen_FOO calls +;; from i386.c. + +;; In C guard expressions, put expressions which may be compile-time +;; constants first. This allows for better optimization. For +;; example, write "TARGET_64BIT && reload_completed", not +;; "reload_completed && TARGET_64BIT". + + +;; Processor type. +(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7, + atom,generic64,amdfam10,bdver1,btver1" + (const (symbol_ref "ix86_schedule"))) + +;; A basic instruction type. Refinements due to arguments to be +;; provided in other attributes. +(define_attr "type" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + icmp,test,ibr,setcc,icmov, + push,pop,call,callv,leave, + str,bitmanip, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, + sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, + ssemuladd,sse4arg,lwp, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" + (const_string "other")) + +;; Main data type used by the insn +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF" + (const_string "unknown")) + +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") + (const_string "i387") + (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, + ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) + +;; The (bounding maximum) length of an instruction immediate. +(define_attr "length_immediate" "" + (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, + bitmanip") + (const_int 0) + (eq_attr "unit" "i387,sse,mmx") + (const_int 0) + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, + imul,icmp,push,pop") + (symbol_ref "ix86_attr_length_immediate_default(insn,1)") + (eq_attr "type" "imov,test") + (symbol_ref "ix86_attr_length_immediate_default(insn,0)") + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + ;; We don't know the size before shorten_branches. Expect + ;; the instruction to fit for better scheduling. + (eq_attr "type" "ibr") + (const_int 1) + ] + (symbol_ref "/* Update immediate_length and other attributes! */ + gcc_unreachable (),1"))) + +;; The (bounding maximum) length of an instruction address. +(define_attr "length_address" "" + (cond [(eq_attr "type" "str,other,multi,fxch") + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + ] + (symbol_ref "ix86_attr_length_address_default (insn)"))) + +;; Set when length prefix is used. +(define_attr "prefix_data16" "" + (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") + (const_int 0) + (eq_attr "mode" "HI") + (const_int 1) + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI")) + (const_int 1) + ] + (const_int 0))) + +;; Set when string REP prefix is used. +(define_attr "prefix_rep" "" + (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") + (const_int 0) + (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + ] + (const_int 0))) + +;; Set when 0f opcode prefix is used. +(define_attr "prefix_0f" "" + (if_then_else + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip") + (eq_attr "unit" "sse,mmx")) + (const_int 1) + (const_int 0))) + +;; Set when REX opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(eq (symbol_ref "TARGET_64BIT") (const_int 0)) + (const_int 0) + (and (eq_attr "mode" "DI") + (and (eq_attr "type" "!push,pop,call,callv,leave,ibr") + (eq_attr "unit" "!mmx"))) + (const_int 1) + (and (eq_attr "mode" "QI") + (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)") + (const_int 0))) + (const_int 1) + (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)") + (const_int 0)) + (const_int 1) + (and (eq_attr "type" "imovx") + (match_operand:QI 1 "ext_QIreg_operand" "")) + (const_int 1) + ] + (const_int 0))) + +;; There are also additional prefixes in 3DNOW, SSSE3. +;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte, +;; sseiadd1,ssecvt1 to 0f7a with no DREX byte. +;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a. +(define_attr "prefix_extra" "" + (cond [(eq_attr "type" "ssemuladd,sse4arg") + (const_int 2) + (eq_attr "type" "sseiadd1,ssecvt1") + (const_int 1) + ] + (const_int 0))) + +;; Prefix used: original, VEX or maybe VEX. +(define_attr "prefix" "orig,vex,maybe_vex" + (if_then_else (eq_attr "mode" "OI,V8SF,V4DF") + (const_string "vex") + (const_string "orig"))) + +;; VEX W bit is used. +(define_attr "prefix_vex_w" "" (const_int 0)) + +;; The length of VEX prefix +;; Only instructions with 0f prefix can have 2 byte VEX prefix, +;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is +;; still prefix_0f 1, with prefix_extra 1. +(define_attr "length_vex" "" + (if_then_else (and (eq_attr "prefix_0f" "1") + (eq_attr "prefix_extra" "0")) + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)") + (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)")) + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)") + (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)")))) + +;; Set when modrm byte is used. +(define_attr "modrm" "" + (cond [(eq_attr "type" "str,leave") + (const_int 0) + (eq_attr "unit" "i387") + (const_int 0) + (and (eq_attr "type" "incdec") + (and (eq (symbol_ref "TARGET_64BIT") (const_int 0)) + (ior (match_operand:SI 1 "register_operand" "") + (match_operand:HI 1 "register_operand" "")))) + (const_int 0) + (and (eq_attr "type" "push") + (not (match_operand 1 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "pop") + (not (match_operand 0 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "imov") + (and (not (eq_attr "mode" "DI")) + (ior (and (match_operand 0 "register_operand" "") + (match_operand 1 "immediate_operand" "")) + (ior (and (match_operand 0 "ax_reg_operand" "") + (match_operand 1 "memory_displacement_only_operand" "")) + (and (match_operand 0 "memory_displacement_only_operand" "") + (match_operand 1 "ax_reg_operand" "")))))) + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "alu,alu1,icmp,test") + (match_operand 0 "ax_reg_operand" "")) + (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))") + ] + (const_int 1))) + +;; The (bounding maximum) length of an instruction in bytes. +;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. +;; Later we may want to split them and compute proper length as for +;; other insns. +(define_attr "length" "" + (cond [(eq_attr "type" "other,multi,fistp,frndint") + (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address"))) + (ior (eq_attr "prefix" "vex") + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "TARGET_AVX") (const_int 0)))) + (plus (attr "length_vex") + (plus (attr "length_immediate") + (plus (attr "modrm") + (attr "length_address"))))] + (plus (plus (attr "modrm") + (plus (attr "prefix_0f") + (plus (attr "prefix_rex") + (plus (attr "prefix_extra") + (const_int 1))))) + (plus (attr "prefix_rep") + (plus (attr "prefix_data16") + (plus (attr "length_immediate") + (attr "length_address"))))))) + +;; The `memory' attribute is `none' if no memory is referenced, `load' or +;; `store' if there is a simple memory reference therein, or `unknown' +;; if the instruction is complex. + +(define_attr "memory" "none,load,store,both,unknown" + (cond [(eq_attr "type" "other,multi,str,lwp") + (const_string "unknown") + (eq_attr "type" "lea,fcmov,fpspc") + (const_string "none") + (eq_attr "type" "fistp,leave") + (const_string "both") + (eq_attr "type" "frndint") + (const_string "load") + (eq_attr "type" "push") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "both") + (const_string "store")) + (eq_attr "type" "pop") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "both") + (const_string "load")) + (eq_attr "type" "setcc") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "store") + (const_string "none")) + (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") + (if_then_else (ior (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "load") + (const_string "none")) + (eq_attr "type" "ibr") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "load") + (const_string "none")) + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (and (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (match_operand 0 "memory_operand" "") + (const_string "store") + (match_operand 1 "memory_operand" "") + (const_string "load") + (and (eq_attr "type" + "!alu1,negnot,ishift1, + imov,imovx,icmp,test,bitmanip, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1, + sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt") + (match_operand 2 "memory_operand" "")) + (const_string "load") + (and (eq_attr "type" "icmov,ssemuladd,sse4arg") + (match_operand 3 "memory_operand" "")) + (const_string "load") + ] + (const_string "none"))) + +;; Indicates if an instruction has both an immediate and a displacement. + +(define_attr "imm_disp" "false,true,unknown" + (cond [(eq_attr "type" "other,multi") + (const_string "unknown") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "true") + (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 2 "immediate_operand" ""))) + (const_string "true") + ] + (const_string "false"))) + +;; Indicates if an FP operation has an integer source. + +(define_attr "fp_int_src" "false,true" + (const_string "false")) + +;; Defines rounding mode of an FP operation. + +(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" + (const_string "any")) + +;; Define attribute to classify add/sub insns that consumes carry flag (CF) +(define_attr "use_carry" "0,1" (const_string "0")) + +;; Define attribute to indicate unaligned ssemov insns +(define_attr "movu" "0,1" (const_string "0")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "length" "128") + (set_attr "type" "multi")]) + +(define_code_iterator plusminus [plus minus]) + +(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) + +;; Base name for define_insn +(define_code_attr plusminus_insn + [(plus "add") (ss_plus "ssadd") (us_plus "usadd") + (minus "sub") (ss_minus "sssub") (us_minus "ussub")]) + +;; Base name for insn mnemonic. +(define_code_attr plusminus_mnemonic + [(plus "add") (ss_plus "adds") (us_plus "addus") + (minus "sub") (ss_minus "subs") (us_minus "subus")]) +(define_code_attr plusminus_carry_mnemonic + [(plus "adc") (minus "sbb")]) + +;; Mark commutative operators as such in constraints. +(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") + (minus "") (ss_minus "") (us_minus "")]) + +;; Mapping of signed max and min +(define_code_iterator smaxmin [smax smin]) + +;; Mapping of unsigned max and min +(define_code_iterator umaxmin [umax umin]) + +;; Base name for integer and FP insn mnemonic +(define_code_attr maxmin_int [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) +(define_code_attr maxmin_float [(smax "max") (smin "min")]) + +;; Mapping of logic operators +(define_code_iterator any_logic [and ior xor]) +(define_code_iterator any_or [ior xor]) + +;; Base name for insn mnemonic. +(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) + +;; Mapping of shift-right operators +(define_code_iterator any_shiftrt [lshiftrt ashiftrt]) + +;; Base name for define_insn +(define_code_attr shiftrt_insn [(lshiftrt "lshr") (ashiftrt "ashr")]) + +;; Base name for insn mnemonic. +(define_code_attr shiftrt [(lshiftrt "shr") (ashiftrt "sar")]) + +;; Mapping of rotate operators +(define_code_iterator any_rotate [rotate rotatert]) + +;; Base name for define_insn +(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")]) + +;; Base name for insn mnemonic. +(define_code_attr rotate [(rotate "rol") (rotatert "ror")]) + +;; Mapping of abs neg operators +(define_code_iterator absneg [abs neg]) + +;; Base name for x87 insn mnemonic. +(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")]) + +;; Used in signed and unsigned widening multiplications. +(define_code_iterator any_extend [sign_extend zero_extend]) + +;; Various insn prefixes for signed and unsigned operations. +(define_code_attr u [(sign_extend "") (zero_extend "u") + (div "") (udiv "u")]) +(define_code_attr s [(sign_extend "s") (zero_extend "u")]) + +;; Used in signed and unsigned divisions. +(define_code_iterator any_div [div udiv]) + +;; Instruction prefix for signed and unsigned operations. +(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "") + (div "i") (udiv "")]) + +;; 64bit single word integer modes. +(define_mode_iterator SWI1248x [QI HI SI DI]) + +;; 64bit single word integer modes without QImode and HImode. +(define_mode_iterator SWI48x [SI DI]) + +;; Single word integer modes. +(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) + +;; Single word integer modes without SImode and DImode. +(define_mode_iterator SWI12 [QI HI]) + +;; Single word integer modes without DImode. +(define_mode_iterator SWI124 [QI HI SI]) + +;; Single word integer modes without QImode and DImode. +(define_mode_iterator SWI24 [HI SI]) + +;; Single word integer modes without QImode. +(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")]) + +;; Single word integer modes without QImode and HImode. +(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")]) + +;; All math-dependant single and double word integer modes. +(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI DI (TI "TARGET_64BIT")]) + +;; Math-dependant single word integer modes. +(define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI (DI "TARGET_64BIT")]) + +;; Math-dependant single word integer modes without DImode. +(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI]) + +;; Math-dependant single word integer modes without QImode. +(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH") + SI (DI "TARGET_64BIT")]) + +;; Double word integer modes. +(define_mode_iterator DWI [(DI "!TARGET_64BIT") + (TI "TARGET_64BIT")]) + +;; Double word integer modes as mode attribute. +(define_mode_attr DWI [(SI "DI") (DI "TI")]) +(define_mode_attr dwi [(SI "di") (DI "ti")]) + +;; Half mode for double word integer modes. +(define_mode_iterator DWIH [(SI "!TARGET_64BIT") + (DI "TARGET_64BIT")]) + +;; Instruction suffix for integer modes. +(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) + +;; Pointer size prefix for integer modes (Intel asm dialect) +(define_mode_attr iptrsize [(QI "BYTE") + (HI "WORD") + (SI "DWORD") + (DI "QWORD")]) + +;; Register class for integer modes. +(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) + +;; Immediate operand constraint for integer modes. +(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")]) + +;; General operand constraint for word modes. +(define_mode_attr g [(QI "qmn") (HI "rmn") (SI "g") (DI "rme")]) + +;; Immediate operand constraint for double integer modes. +(define_mode_attr di [(SI "iF") (DI "e")]) + +;; Immediate operand constraint for shifts. +(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) + +;; General operand predicate for integer modes. +(define_mode_attr general_operand + [(QI "general_operand") + (HI "general_operand") + (SI "general_operand") + (DI "x86_64_general_operand") + (TI "x86_64_general_operand")]) + +;; General sign/zero extend operand predicate for integer modes. +(define_mode_attr general_szext_operand + [(QI "general_operand") + (HI "general_operand") + (SI "general_operand") + (DI "x86_64_szext_general_operand")]) + +;; Immediate operand predicate for integer modes. +(define_mode_attr immediate_operand + [(QI "immediate_operand") + (HI "immediate_operand") + (SI "immediate_operand") + (DI "x86_64_immediate_operand")]) + +;; Nonmemory operand predicate for integer modes. +(define_mode_attr nonmemory_operand + [(QI "nonmemory_operand") + (HI "nonmemory_operand") + (SI "nonmemory_operand") + (DI "x86_64_nonmemory_operand")]) + +;; Operand predicate for shifts. +(define_mode_attr shift_operand + [(QI "nonimmediate_operand") + (HI "nonimmediate_operand") + (SI "nonimmediate_operand") + (DI "shiftdi_operand") + (TI "register_operand")]) + +;; Operand predicate for shift argument. +(define_mode_attr shift_immediate_operand + [(QI "const_1_to_31_operand") + (HI "const_1_to_31_operand") + (SI "const_1_to_31_operand") + (DI "const_1_to_63_operand")]) + +;; Input operand predicate for arithmetic left shifts. +(define_mode_attr ashl_input_operand + [(QI "nonimmediate_operand") + (HI "nonimmediate_operand") + (SI "nonimmediate_operand") + (DI "ashldi_input_operand") + (TI "reg_or_pm1_operand")]) + +;; SSE and x87 SFmode and DFmode floating point modes +(define_mode_iterator MODEF [SF DF]) + +;; All x87 floating point modes +(define_mode_iterator X87MODEF [SF DF XF]) + +;; All integer modes handled by x87 fisttp operator. +(define_mode_iterator X87MODEI [HI SI DI]) + +;; All integer modes handled by integer x87 operators. +(define_mode_iterator X87MODEI12 [HI SI]) + +;; All integer modes handled by SSE cvtts?2si* operators. +(define_mode_iterator SSEMODEI24 [SI DI]) + +;; SSE asm suffix for floating point modes +(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")]) + +;; SSE vector mode corresponding to a scalar mode +(define_mode_attr ssevecmode + [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")]) + +;; Instruction suffix for REX 64bit operators. +(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; Scheduling descriptions + +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") +(include "bdver1.md") +(include "geode.md") +(include "atom.md") +(include "core2.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; Compare and branch/compare and store instructions. + +(define_expand "cbranch4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SDWIM 1 "nonimmediate_operand" "") + (match_operand:SDWIM 2 "" ""))) + (set (pc) (if_then_else + (match_operator 0 "ordered_comparison_operator" + [(reg:CC FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + if (MEM_P (operands[1]) && MEM_P (operands[2])) + operands[1] = force_reg (mode, operands[1]); + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstore4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWIM 2 "nonimmediate_operand" "") + (match_operand:SWIM 3 "" ""))) + (set (match_operand:QI 0 "register_operand" "") + (match_operator 1 "ordered_comparison_operator" + [(reg:CC FLAGS_REG) (const_int 0)]))] + "" +{ + if (MEM_P (operands[2]) && MEM_P (operands[3])) + operands[2] = force_reg (mode, operands[2]); + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + +(define_expand "cmp_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWI48 0 "nonimmediate_operand" "") + (match_operand:SWI48 1 "" "")))]) + +(define_insn "*cmp_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI 0 "nonimmediate_operand" ",?m") + (match_operand:SWI 1 "const0_operand" "")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{}\t%0, %0 + cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "")]) + +(define_insn "*cmp_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*cmp_minus_1" + [(set (reg FLAGS_REG) + (compare + (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*cmpqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "general_operand" "Qm") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "register_operand" "Q") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "const0_operand" "")))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t%h0, %h0" + [(set_attr "type" "test") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_expand "cmpqi_ext_3" + [(set (reg:CC FLAGS_REG) + (compare:CC + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "immediate_operand" "")))]) + +(define_insn "*cmpqi_ext_3_insn" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "Qmn")))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_3_insn_rex64" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "nonmemory_operand" "Qn")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_4" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +;; These implement float point compares. +;; %%% See if we can get away with VOIDmode operands on the actual insns, +;; which would allow mix and match FP modes on the compares. Which is what +;; the old patterns did, but with many more of them. + +(define_expand "cbranchxf4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 1 "nonmemory_operand" "") + (match_operand:XF 2 "nonmemory_operand" ""))) + (set (pc) (if_then_else + (match_operator 0 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_80387" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstorexf4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 2 "nonmemory_operand" "") + (match_operand:XF 3 "nonmemory_operand" ""))) + (set (match_operand:QI 0 "register_operand" "") + (match_operator 1 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]))] + "TARGET_80387" +{ + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + +(define_expand "cbranch4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand" "") + (match_operand:MODEF 2 "cmp_fp_expander_operand" ""))) + (set (pc) (if_then_else + (match_operator 0 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstore4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand" "") + (match_operand:MODEF 3 "cmp_fp_expander_operand" ""))) + (set (match_operand:QI 0 "register_operand" "") + (match_operator 1 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + +(define_expand "cbranchcc4" + [(set (pc) (if_then_else + (match_operator 0 "comparison_operator" + [(match_operand 1 "flags_reg_operand" "") + (match_operand 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstorecc4" + [(set (match_operand:QI 0 "register_operand" "") + (match_operator 1 "comparison_operator" + [(match_operand 2 "flags_reg_operand" "") + (match_operand 3 "const0_operand" "")]))] + "" +{ + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + + +;; FP compares, step 1: +;; Set the FP condition codes. +;; +;; CCFPmode compare with exceptions +;; CCFPUmode compare with no exceptions + +;; We may not use "#" to split and emit these, since the REG_DEAD notes +;; used to manage the reg stack popping would not be preserved. + +(define_insn "*cmpfp_0" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" ""))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn_and_split "*cmpfp_0_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" ""))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn "*cmpfp_xf" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn_and_split "*cmpfp_xf_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_insn "*cmpfp_u" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 0, 1);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn_and_split "*cmpfp_u_cc" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFPU (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn "*cmpfp_" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP + (match_dup 1) + (match_op_dup 3 [(match_dup 2)]))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +;; FP compares, step 2 +;; Move the fpsw to ax. + +(define_insn "x86_fnstsw_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] + "TARGET_80387" + "fnstsw\t%0" + [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + (set_attr "mode" "SI") + (set_attr "unit" "i387")]) + +;; FP compares, step 3 +;; Get ax into flags, general case. + +(define_insn "x86_sahf_1" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] + UNSPEC_SAHF))] + "TARGET_SAHF" +{ +#ifndef HAVE_AS_IX86_SAHF + if (TARGET_64BIT) + return ASM_BYTE "0x9e"; + else +#endif + return "sahf"; +} + [(set_attr "length" "1") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +;; Pentium Pro can do steps 1 through 3 in one go. +;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) +(define_insn "*cmpfp_i_mixed" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set (attr "prefix_rep") + (if_then_else (eq_attr "type" "ssecomi") + (const_string "0") + (const_string "*"))) + (set (attr "prefix_data16") + (cond [(eq_attr "type" "fcmp") + (const_string "*") + (eq_attr "mode" "DF") + (const_string "1") + ] + (const_string "0"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpfp_i_sse" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "prefix_rep" "0") + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "DF") + (const_string "1") + (const_string "0"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpfp_i_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpfp_iu_mixed" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set (attr "prefix_rep") + (if_then_else (eq_attr "type" "ssecomi") + (const_string "0") + (const_string "*"))) + (set (attr "prefix_data16") + (cond [(eq_attr "type" "fcmp") + (const_string "*") + (eq_attr "mode" "DF") + (const_string "1") + ] + (const_string "0"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpfp_iu_sse" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "prefix_rep" "0") + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "DF") + (const_string "1") + (const_string "0"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpfp_iu_387" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) + +;; Push/pop instructions. + +(define_insn "*push2" + [(set (match_operand:DWI 0 "push_operand" "=<") + (match_operand:DWI 1 "general_no_elim_operand" "riF*m"))] + "" + "#") + +(define_split + [(set (match_operand:TI 0 "push_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushdi2_rex64" + [(set (match_operand:DI 0 "push_operand" "=<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + "TARGET_64BIT" + "@ + push{q}\t%1 + #" + [(set_attr "type" "push,multi") + (set_attr "mode" "DI")]) + +;; Convert impossible pushes of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, push sign extended lower part first and then overwrite +;; upper part by 32bit move. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); + + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); +}) + +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); + + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); +}) + +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushsi2" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; emit_push_insn when it calls move_by_pieces requires an insn to +;; "push a byte/word". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. + +;; For TARGET_64BIT we always round up to 8 bytes. +(define_insn "*push2_rex64" + [(set (match_operand:SWI124 0 "push_operand" "=X") + (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*push2" + [(set (match_operand:SWI12 0 "push_operand" "=X") + (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*push2_prologue" + [(set (match_operand:P 0 "push_operand" "=<") + (match_operand:P 1 "general_no_elim_operand" "r*m")) + (clobber (mem:BLK (scratch)))] + "" + "push{}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "")]) + +(define_insn "*pop1" + [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") + (match_operand:P 1 "pop_operand" ">"))] + "" + "pop{}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "")]) + +(define_insn "*pop1_epilogue" + [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") + (match_operand:P 1 "pop_operand" ">")) + (clobber (mem:BLK (scratch)))] + "" + "pop{}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "")]) + +;; Move instructions. + +(define_expand "movoi" + [(set (match_operand:OI 0 "nonimmediate_operand" "") + (match_operand:OI 1 "general_operand" ""))] + "TARGET_AVX" + "ix86_expand_move (OImode, operands); DONE;") + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_64BIT || TARGET_SSE" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else if (push_operand (operands[0], TImode)) + ix86_expand_push (TImode, operands[1]); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +;; This expands to what emit_move_complex would generate if we didn't +;; have a movti pattern. Having this avoids problems with reload on +;; 32-bit targets when SSE is present, but doesn't seem to be harmful +;; to have around all the time. +(define_expand "movcdi" + [(set (match_operand:CDI 0 "nonimmediate_operand" "") + (match_operand:CDI 1 "general_operand" ""))] + "" +{ + if (push_operand (operands[0], CDImode)) + emit_move_complex_push (CDImode, operands[0], operands[1]); + else + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +(define_expand "mov" + [(set (match_operand:SWI1248x 0 "nonimmediate_operand" "") + (match_operand:SWI1248x 1 "general_operand" ""))] + "" + "ix86_expand_move (mode, operands); DONE;") + +(define_insn "*mov_xor" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{l}\t%k0, %k0" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov_or" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "const_int_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && operands[1] == constm1_rtx" + "or{}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "") + (set_attr "length_immediate" "1")]) + +(define_insn "*movoi_internal_avx" + [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:OI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return "vxorps\t%0, %0, %0"; + case 1: + case 2: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + return "vmovdqu\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "*movti_internal_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed + && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movti_internal_sse" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 1: + case 2: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0))) + (const_string "V4SF")] + (const_string "TI")))]) + +(define_insn "*movdi_internal_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r ,r,m ,!o,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") + (match_operand:DI 1 "general_operand" + "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSECVT: + if (SSE_REG_P (operands[0])) + return "movq2dq\t{%1, %0|%0, %1}"; + else + return "movdq2q\t{%1, %0|%0, %1}"; + + case TYPE_SSEMOV: + if (get_attr_mode (insn) == MODE_TI) + return "%vmovdqa\t{%1, %0|%0, %1}"; + /* Handle broken assemblers that require movd instead of movq. */ + if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) + return "%vmovd\t{%1, %0|%0, %1}"; + return "%vmovq\t{%1, %0|%0, %1}"; + + case TYPE_MMXMOV: + /* Handle broken assemblers that require movd instead of movq. */ + if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) + return "movd\t{%1, %0|%0, %1}"; + return "movq\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + return "%vpxor\t%0, %d0"; + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MULTI: + return "#"; + + case TYPE_LEA: + return "lea{q}\t{%a1, %0|%0, %a1}"; + + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else if (which_alternative == 2) + return "movabs{q}\t{%1, %0|%0, %1}"; + else + return "mov{q}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5") + (const_string "mmx") + (eq_attr "alternative" "6,7,8,9,10") + (const_string "mmxmov") + (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12,13,14,15,16") + (const_string "ssemov") + (eq_attr "alternative" "17,18") + (const_string "ssecvt") + (eq_attr "alternative" "4") + (const_string "multi") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "modrm") + (if_then_else + (and (eq_attr "alternative" "2") (eq_attr "type" "imov")) + (const_string "0") + (const_string "*"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "alternative" "2") (eq_attr "type" "imov")) + (const_string "8") + (const_string "*"))) + (set_attr "prefix_rex" "*,*,*,*,*,*,*,1,*,1,*,*,*,*,*,*,*,*,*") + (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,1,*,*,*") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "11,12,13,14,15,16") + (const_string "maybe_vex") + (const_string "orig"))) + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")]) + +;; Convert impossible stores of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, move by 32bit parts. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);") + +(define_insn "*movdi_internal" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,*y,m*y,*y,*Y2,m ,*Y2,*Y2,*x,m ,*x,*x") + (match_operand:DI 1 "general_operand" + "riFo,riF,C ,*y ,m ,C ,*Y2,*Y2,m ,C ,*x,*x,m "))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + # + # + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + %vpxor\t%0, %d0 + %vmovq\t{%1, %0|%0, %1} + %vmovdqa\t{%1, %0|%0, %1} + %vmovq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8") + (const_string "maybe_vex") + (const_string "orig"))) + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) + && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movsi_internal" + [(set (match_operand:SI 0 "nonimmediate_operand" + "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") + (match_operand:SI 1 "general_operand" + "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + return "%vxorps\t%0, %d0"; + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_SI: + return "%vmovd\t{%1, %0|%0, %1}"; + case MODE_SF: + return "%vmovss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) + return "movq\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_LEA: + return "lea{l}\t{%a1, %0|%0, %a1}"; + + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + return "mov{l}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "mmx") + (eq_attr "alternative" "3,4,5") + (const_string "mmxmov") + (eq_attr "alternative" "6") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,11") + (const_string "ssemov") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "prefix_data16") + (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (if_then_else + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (and (eq_attr "alternative" "8,9,10,11") + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (const_string "SF") + ] + (const_string "SI")))]) + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "imov") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "SI") + ] + (const_string "HI")))]) + +;; Situation is quite tricky about when to choose full sized (SImode) move +;; over QImode moves. For Q_REG -> Q_REG move we use full size only for +;; partial register dependency machines (such as AMD Athlon), where QImode +;; moves issue extra dependency and for partial register stalls machines +;; that don't use QImode patterns (and QImode move cause stall on the next +;; instruction). +;; +;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial +;; register stall machines with, where we use QImode instructions, since +;; partial register stall can be caused there. Then we use movzx. +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") + (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand" ""))) + (const_string "imovx") + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "3") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (eq_attr "alternative" "3,5") + (const_string "imovx") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (const_string "SI") + (eq_attr "alternative" "6") + (const_string "QI") + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (and (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)))))) + (const_string "SI") + ;; Avoid partial register stalls when not using QImode arithmetic + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0))))) + (const_string "SI") + ] + (const_string "QI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabs_1" + [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SWI1248x 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{}\t{%1, %P0|[%P0], %1} + mov{}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "")]) + +(define_insn "*movabs_2" + [(set (match_operand:SWI1248x 0 "register_operand" "=a,r") + (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{}\t{%P1, %0|%0, [%P1]} + mov{}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "")]) + +(define_insn "*swap" + [(set (match_operand:SWI48 0 "register_operand" "+r") + (match_operand:SWI48 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "" + "xchg{}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*swap_1" + [(set (match_operand:SWI12 0 "register_operand" "+r") + (match_operand:SWI12 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double")]) + +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL +;; is disabled for AMDFAM10 +(define_insn "*swap_2" + [(set (match_operand:SWI12 0 "register_operand" "+") + (match_operand:SWI12 1 "register_operand" "+")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstrict" + [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand" "")) + (match_operand:SWI12 1 "general_operand" ""))] + "" +{ + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; + if (GET_CODE (operands[0]) == SUBREG + && GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT) + FAIL; + /* Don't generate memory->memory moves, go through a register */ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[1] = force_reg (mode, operands[1]); +}) + +(define_insn "*movstrict_1" + [(set (strict_low_part + (match_operand:SWI12 0 "nonimmediate_operand" "+m,")) + (match_operand:SWI12 1 "general_operand" "n,m"))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "mov{}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "")]) + +(define_insn "*movstrict_xor" + [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+")) + (match_operand:SWI12 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{}\t%0, %0" + [(set_attr "type" "alu1") + (set_attr "mode" "") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov_extv_1" + [(set (match_operand:SWI24 0 "register_operand" "=R") + (sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extv_1_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extv_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*mov_extzv_1" + [(set (match_operand:SWI48 0 "register_operand" "=R") + (zero_extract:SWI48 (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extzv_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extzv_2" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_expand "mov_insv_1" + [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand:SWI48 1 "nonmemory_operand" ""))]) + +(define_insn "*mov_insv_1_rex64" + [(set (zero_extract:SWI48x (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SWI48x 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movsi_insv_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "general_operand" "Qmn"))] + "!TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movqi_insv_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +;; Floating point push instructions. + +(define_insn "*pushtf" + [(set (match_operand:TF 0 "push_operand" "=<,<,<") + (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] + "TARGET_SSE2" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "sse,*,*") + (set_attr "mode" "TF,SI,SI")]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "sse_reg_operand" ""))] + "TARGET_SSE2 && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) + (set (mem:TF (reg:P SP_REG)) (match_dup 1))]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "TARGET_SSE2 && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushxf" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] + "optimize_function_for_speed_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*") + (set_attr "mode" "XF,SI")]) + +;; Size of pushxf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushxf using integer instructions is 3+3*memory operand size +;; Pushing using integer instructions is longer except for constants +;; and direct memory references (assuming that any given constant is pushed +;; only once, but this ought to be handled elsewhere). + +(define_insn "*pushxf_nointeger" + [(set (match_operand:XF 0 "push_operand" "=X,X,X") + (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] + "optimize_function_for_size_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "XF,SI,SI")]) + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "reload_completed + && !FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushdf" + [(set (match_operand:DF 0 "push_operand" "=<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] + "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "DF,SI,DF")]) + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. Allow this +;; pattern for optimize_size too. + +(define_insn "*pushdf_nointeger" + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] + "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") + (set_attr "mode" "DF,SI,SI,DF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (match_dup 1))]) + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] + "TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,DI,SF")]) + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,SI,SF")]) + +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" ""))] + "reload_completed + && MEM_P (operands[1]) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) + (match_dup 2))]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:SF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (mode));") + +;; Floating point move instructions. + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_expand "mov" + [(set (match_operand:X87MODEF 0 "nonimmediate_operand" "") + (match_operand:X87MODEF 1 "general_operand" ""))] + "" + "ix86_expand_move (mode, operands); DONE;") + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_SSE2 + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,2") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movxf_internal" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] + "optimize_function_for_speed_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +;; Do not use integer registers when optimizing for size +(define_insn "*movxf_internal_nointeger" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "optimize_function_for_size_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || standard_80387_constant_p (operands[1]) > 0 + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_split + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && FP_REG_P (SUBREG_REG (operands[0])))) + && ! (FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movdf_internal_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,m,!r,!o,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + (match_operand:DF 1 "general_operand" + "fm,f,G,rm,r,F ,F ,C ,Y2*x,m ,Y2*x,r ,Yi"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1]) > 0) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "mov{q}\t{%1, %0|%0, %1}"; + + case 5: + return "movabs{q}\t{%1, %0|%0, %1}"; + + case 6: + return "#"; + + case 7: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vxorpd\t%0, %d0"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 8: + case 9: + case 10: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "%vmovlpd\t{%1, %d0|%d0, %1}"; + case MODE_V2SF: + return "%vmovlps\t{%1, %d0|%d0, %1}"; + default: + gcc_unreachable (); + } + + case 11: + case 12: + /* Handle broken assemblers that require movd instead of movq. */ + return "%vmovd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable(); + } +} + [(set_attr "type" "fmov,fmov,fmov,imov,imov,imov,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") + (set (attr "modrm") + (if_then_else + (and (eq_attr "alternative" "5") (eq_attr "type" "imov")) + (const_string "0") + (const_string "*"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "alternative" "5") (eq_attr "type" "imov")) + (const_string "8") + (const_string "*"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4,5,6") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "V1DF") + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4,5,6,11,12") + (const_string "DI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "7,8") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "7") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "8") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "9") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_insn "*movdf_internal" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && optimize_function_for_speed_p (cfun) + && TARGET_INTEGER_DFMODE_MOVES + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vxorpd\t%0, %d0"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlpd\t{%1, %0|%0, %1}"; + } + else + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlps\t{%1, %0|%0, %1}"; + } + else + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "V1DF") + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +;; Moving is usually shorter when only FP registers are used. This separate +;; movdf pattern avoids the use of integer registers for FP operations +;; when optimizing for size. + +(define_insn "*movdf_internal_nointeger" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x"))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (optimize_function_for_size_p (cfun) + || !TARGET_INTEGER_DFMODE_MOVES) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && !memory_operand (operands[0], DFmode) + && standard_80387_constant_p (operands[1]) > 0) + || GET_CODE (operands[1]) != CONST_DOUBLE + || ((optimize_function_for_size_p (cfun) + || !TARGET_MEMORY_MISMATCH_STALL) + && memory_operand (operands[0], DFmode)))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vxorpd\t%0, %d0"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlpd\t{%1, %0|%0, %1}"; + } + else + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlps\t{%1, %0|%0, %1}"; + } + else + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "V1DF") + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_split + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") + (match_operand:SF 1 "general_operand" + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1]) > 0) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], SFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + else + return "%vxorps\t%0, %d0"; + case 6: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovss\t{%1, %d0|%d0, %1}"; + case 7: + if (TARGET_AVX) + return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" + : "vmovss\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; + case 8: + return "%vmovss\t{%1, %0|%0, %1}"; + + case 9: case 10: case 14: case 15: + return "movd\t{%1, %0|%0, %1}"; + + case 11: + return "movq\t{%1, %0|%0, %1}"; + + case 12: case 13: + return "%vmovd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "memory_operand" ""))] + "reload_completed + && MEM_P (operands[1]) + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (standard_80387_constant_p (c) < 1) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (float_extend (match_operand 1 "memory_operand" "")))] + "reload_completed + && MEM_P (operands[1]) + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (standard_80387_constant_p (c) < 1) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; +}) + +;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (match_operand:X87MODEF 1 "immediate_operand" ""))] + "reload_completed && FP_REGNO_P (REGNO (operands[0])) + && (standard_80387_constant_p (operands[1]) == 8 + || standard_80387_constant_p (operands[1]) == 9)" + [(set (match_dup 0)(match_dup 1)) + (set (match_dup 0) + (neg:X87MODEF (match_dup 0)))] +{ + REAL_VALUE_TYPE r; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + if (real_isnegzero (&r)) + operands[1] = CONST0_RTX (mode); + else + operands[1] = CONST1_RTX (mode); +}) + +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +(define_insn "*swap" + [(set (match_operand:MODEF 0 "fp_register_operand" "+f") + (match_operand:MODEF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387 || reload_completed" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "")]) + +;; Zero extension instructions + +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" +{ + if (!TARGET_64BIT) + { + emit_insn (gen_zero_extendsidi2_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*zero_extendsidi2_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] + "TARGET_64BIT" + "@ + mov{l}\t{%1, %k0|%k0, %1} + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "prefix_0f" "0,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,TI,TI")]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (zero_extend:DI (match_dup 0)))] + "TARGET_64BIT" + [(set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +;; %%% Kill me once multi-word ops are sane. +(define_insn "zero_extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "@ + # + # + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +(define_insn "zero_extenddi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (match_operand:SWI12 1 "nonimmediate_operand" "m")))] + "TARGET_64BIT" + "movz{l|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" +{ + if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + { + operands[1] = force_reg (HImode, operands[1]); + emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); + DONE; + } +}) + +(define_insn_and_split "zero_extendhisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extendhisi2_movzwl" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "!TARGET_ZERO_EXTEND_WITH_AND + || optimize_function_for_size_p (cfun)" + "movz{wl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extendqi2" + [(parallel + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*zero_extendqi2_and" + [(set (match_operand:SWI24 0 "register_operand" "=r,?&q") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "")]) + +;; When source and destination does not overlap, clear destination +;; first and then do the movb +(define_split + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + && ANY_QI_REG_P (operands[0]) + && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (strict_low_part (match_dup 2)) (match_dup 1))] +{ + operands[2] = gen_lowpart (QImode, operands[0]); + ix86_expand_clear (operands[0]); +}) + +(define_insn "*zero_extendqi2_movzbl_and" + [(set (match_operand:SWI24 0 "register_operand" "=r,r") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "")]) + +;; For the movzbl case strip only the clobber +(define_split + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_dup 0) + (zero_extend:SWI24 (match_dup 1)))]) + +; zero extend to SImode to avoid partial register stalls +(define_insn "*zero_extendqi2_movzbl" + [(set (match_operand:SWI24 0 "register_operand" "=r") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" + "movz{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; Rest is handled by single and. +(define_split + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:SWI24 (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Sign extension instructions + +(define_expand "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" "")))] + "" +{ + if (!TARGET_64BIT) + { + emit_insn (gen_extendsidi2_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*extendsidi2_rex64" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] + "TARGET_64BIT" + "@ + {cltq|cdqe} + movs{lq|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI") + (set_attr "prefix_0f" "0") + (set_attr "modrm" "0,1")]) + +(define_insn "extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 "=X,X,X,&r"))] + "!TARGET_64BIT" + "#") + +;; Extend to memory case when source register does die. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand" ""))] + "(reload_completed + && dead_or_set_p (insn, operands[1]) + && !reg_mentioned_p (operands[1], operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 4) (match_dup 1))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +;; Extend to memory case when source register does not die. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand" ""))] + "reload_completed" + [(const_int 0)] +{ + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG) + { + emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31))); + } + else + { + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31))); + } + emit_move_insn (operands[4], operands[2]); + DONE; +}) + +;; Extend to register case. Optimize case where source and destination +;; registers match and cases where we can use cltd. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 ""))] + "reload_completed" + [(const_int 0)] +{ + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + + if (true_regnum (operands[3]) != true_regnum (operands[1])) + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[3]) == AX_REG + && true_regnum (operands[4]) == DX_REG) + { + emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31))); + DONE; + } + + if (true_regnum (operands[4]) != true_regnum (operands[1])) + emit_move_insn (operands[4], operands[1]); + + emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31))); + DONE; +}) + +(define_insn "extenddi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (match_operand:SWI12 1 "nonimmediate_operand" "m")))] + "TARGET_64BIT" + "movs{q|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=*a,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "*extendhisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (zero_extend:DI + (sign_extend:SI + (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] + "TARGET_64BIT" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1, %k0|%k0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "" + "movs{bl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*extendqisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))] + "TARGET_64BIT" + "movs{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=*a,r") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cbtw|cbw}"; + default: + return "movs{bw|x}\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "HI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +;; Conversions between float and double. + +;; These are all no-ops in the model used for the 80387. +;; So just emit moves. + +;; %%% Kill these when call knows how to work out a DFmode push earlier. +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))]) + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:MODEF 1 "fp_register_operand" "")))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (float_extend:DF (match_operand:SF 1 "general_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + && standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, DFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } +}) + +/* For converting SF(xmm2) to DF(xmm1), use the following code instead of + cvtss2sd: + unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtps2pd xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (float_extend:V2DF + (vec_select:V2SF + (match_dup 3) + (parallel [(const_int 0) (const_int 1)]))))] +{ + operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0); + /* Use movss for loading from memory, unpcklps reg, reg for registers. + Try to avoid move when unpacking can be done in source. */ + if (REG_P (operands[1])) + { + /* If it is unsafe to overwrite upper half of source, we need + to move to destination and unpack there. */ + if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) + && true_regnum (operands[0]) != true_regnum (operands[1])) + { + rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + emit_move_insn (tmp, operands[1]); + } + else + operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], + operands[3])); + } + else + emit_insn (gen_vec_setv4sf_0 (operands[3], + CONST0_RTX (V4SFmode), operands[1])); +}) + +(define_insn "*extendsfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "SF,XF,DF")]) + +(define_insn "*extendsfdf2_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtss2sd\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +(define_insn "*extendsfdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extendxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))] + "TARGET_80387" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], mode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (mode, operands[1])); + } +}) + +(define_insn "*extendxf2_i387" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") + (float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" ",XF")]) + +;; %%% This seems bad bad news. +;; This cannot output into an f-reg because there is no way to be sure +;; of truncating in that case. Otherwise this is just like a simple move +;; insn. So we pretend we can output to a reg in order to get better +;; register preferencing, but we really use a stack slot. + +;; Conversion from DFmode to SFmode. + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + ; + else if (flag_unsafe_math_optimizations) + ; + else + { + enum ix86_stack_slot slot = (virtuals_instantiated + ? SLOT_TEMP + : SLOT_VIRTUAL); + rtx temp = assign_386_stack_local (SFmode, slot); + emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); + DONE; + } +}) + +/* For converting DF(xmm2) to SF(xmm1), use the following code instead of + cvtsd2ss: + unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtpd2ps xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (vec_concat:V4SF + (float_truncate:V2SF + (match_dup 4)) + (match_dup 3)))] +{ + operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + operands[3] = CONST0_RTX (V2SFmode); + operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0); + /* Use movsd for loading from memory, unpcklpd for registers. + Try to avoid move when unpacking can be done in source, or SSE3 + movddup is available. */ + if (REG_P (operands[1])) + { + if (!TARGET_SSE3 + && true_regnum (operands[0]) != true_regnum (operands[1]) + && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + { + rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + else if (!TARGET_SSE3) + operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + emit_insn (gen_vec_dupv2df (operands[4], operands[1])); + } + else + emit_insn (gen_sse2_loadlpd (operands[4], + CONST0_RTX (V2DFmode), operands[1])); +}) + +(define_expand "truncdfsf2_with_temp" + [(parallel [(set (match_operand:SF 0 "" "") + (float_truncate:SF (match_operand:DF 1 "" ""))) + (clobber (match_operand:SF 2 "" ""))])]) + +(define_insn "*truncdfsf_fast_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + case 1: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,ssecvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "SF")]) + +;; Yes, this one doesn't depend on flag_unsafe_math_optimizations, +;; because nothing we do here is unsafe. +(define_insn "*truncdfsf_fast_sse" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtsd2ss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_fast_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,Y2 ,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,Y2m,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))] + "TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + case 1: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + + default: + return "#"; + } +} + [(set_attr "type" "fmov,ssecvt,multi,multi,multi") + (set_attr "unit" "*,*,i387,i387,i387") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + default: + return "#"; + } +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !TARGET_MIX_SSE_I387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "fp_register_operand" ""))) + (clobber (match_operand 2 "" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));") + +;; Conversion from XFmode to {SF,DF}mode + +(define_expand "truncxf2" + [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_dup 2))])] + "TARGET_80387" +{ + if (flag_unsafe_math_optimizations) + { + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (mode); + emit_insn (gen_truncxf2_i387_noop (reg, operands[1])); + if (reg != operands[0]) + emit_move_insn (operands[0], reg); + DONE; + } + else + { + enum ix86_stack_slot slot = (virtuals_instantiated + ? SLOT_TEMP + : SLOT_VIRTUAL); + operands[2] = assign_386_stack_local (mode, slot); + } +}) + +(define_insn "*truncxfsf2_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncxfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?Y2,?*r") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "DF")]) + +(define_insn "truncxf2_i387_noop" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_insn "*truncxf2_i387" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:MODEF 0 "memory_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]) + +;; Signed conversion to DImode. + +(define_expand "fix_truncxfdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_truncdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" +{ + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); + emit_insn (gen_fix_truncdi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to SImode. + +(define_expand "fix_truncxfsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_truncsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" +{ + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_truncsi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to HImode. + +(define_expand "fix_trunchi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:X87MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +;; Unsigned conversion to SImode. + +(define_expand "fixuns_truncsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" ""))) + (use (match_dup 2)) + (clobber (match_scratch: 3 "")) + (clobber (match_scratch: 4 ""))])] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" +{ + enum machine_mode mode = mode; + enum machine_mode vecmode = mode; + REAL_VALUE_TYPE TWO31r; + rtx two31; + + if (optimize_insn_for_size_p ()) + FAIL; + + real_ldexp (&TWO31r, &dconst1, 31); + two31 = const_double_from_real_value (TWO31r, mode); + two31 = ix86_build_const_vector (vecmode, true, two31); + operands[2] = force_reg (vecmode, two31); +}) + +(define_insn_and_split "*fixuns_trunc_1" + [(set (match_operand:SI 0 "register_operand" "=&x,&x") + (unsigned_fix:SI + (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) + (use (match_operand: 4 "nonimmediate_operand" "m,x")) + (clobber (match_scratch: 1 "=x,&x")) + (clobber (match_scratch: 2 "=x,x"))] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_convert_uns_si_sse (operands); + DONE; +}) + +;; Unsigned conversion to HImode. +;; Without these patterns, we'll try the unsigned SI conversion which +;; is complex for SSE, rather than the signed SI conversion, which isn't. + +(define_expand "fixuns_trunchi2" + [(set (match_dup 2) + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" ""))) + (set (match_operand:HI 0 "nonimmediate_operand" "") + (subreg:HI (match_dup 2) 0))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "operands[2] = gen_reg_rtx (SImode);") + +;; When SSE is available, it is always faster to use it! +(define_insn "fix_truncdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "TARGET_64BIT && SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtts2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "prefix_rex" "1") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "bdver1_decode" "double,double")]) + +(define_insn "fix_truncsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtts2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "bdver1_decode" "double,double")]) + +;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. +(define_peephole2 + [(set (match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "memory_operand" "")) + (set (match_operand:SSEMODEI24 2 "register_operand" "") + (fix:SSEMODEI24 (match_dup 0)))] + "TARGET_SHORTEN_X87_SSE + && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()) + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]) + +;; Avoid vector decoded forms of the instruction. +(define_peephole2 + [(match_scratch:DF 2 "Y2") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] + "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] + "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]) + +(define_insn_and_split "fix_trunc_fisttp_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" "")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387_fisttp (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp" + [(set (match_operand:X87MODEI 0 "memory_operand" "=m") + (fix:X87MODEI (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, 1);" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp_with_temp" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI 0 "register_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:X87MODEI 0 "memory_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))])]) + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_insn "fix_truncdi_i387" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_insn "fix_truncdi_i387_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])]) + +(define_insn "fix_trunc_i387" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_insn "x86_fnstcw_1" + [(set (match_operand:HI 0 "memory_operand" "=m") + (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))] + "TARGET_80387" + "fnstcw\t%0" + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + (set_attr "mode" "HI") + (set_attr "unit" "i387") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "x86_fldcw_1" + [(set (reg:HI FPCR_REG) + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] + "TARGET_80387" + "fldcw\t%0" + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + (set_attr "mode" "HI") + (set_attr "unit" "i387") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +;; Conversion between fixed point and floating point. + +;; Even though we only accept memory inputs, the backend _really_ +;; wants to be able to do this between registers. + +(define_expand "floathi2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)") + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*floathi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] + "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);") + +(define_insn "*floathi2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:HI 2 "memory_operand" "=m,m"))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "#" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floathi2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "memory_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) + +(define_expand "float2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))] + "TARGET_80387 + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + if (!((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && !X87_ENABLE_FLOAT (mode, mode)) + { + rtx reg = gen_reg_rtx (XFmode); + rtx insn; + + emit_insn (gen_floatxf2 (reg, operands[1])); + + if (mode == SFmode) + insn = gen_truncxfsf2 (operands[0], reg); + else if (mode == DFmode) + insn = gen_truncxfdf2 (operands[0], reg); + else + gcc_unreachable (); + + emit_insn (insn); + DONE; + } +}) + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*float2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))] + "((TARGET_80387 + && X87_ENABLE_FLOAT (mode, mode) + && (!((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)) + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode == SImode + && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS + && optimize_function_for_speed_p (cfun) + && flag_trapping_math) + || !(TARGET_INTER_UNIT_CONVERSIONS + || optimize_function_for_size_p (cfun))))) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + + /* Avoid store forwarding (partial memory) stall penalty + by passing DImode value through XMM registers. */ + if (mode == DImode && !TARGET_64BIT + && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && optimize_function_for_speed_p (cfun)) + { + emit_insn (gen_floatdi2_i387_with_xmm (operands[0], + operands[1], + operands[2])); + DONE; + } +}) + +(define_insn "*floatsi2_vector_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt") + (set_attr "mode" ",,,,") + (set_attr "unit" "*,i387,*,*,*") + (set_attr "athlon_decode" "*,*,double,direct,double") + (set_attr "amdfam10_decode" "*,*,vector,double,double") + (set_attr "bdver1_decode" "*,*,double,direct,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "@ + fild%Z1\t%1 + #" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" ",") + (set_attr "unit" "i387,*") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "bdver1_decode" "*,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "amdfam10_decode" "*,*,vector,double") + (set_attr "bdver1_decode" "*,*,double,direct") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && TARGET_INTER_UNIT_CONVERSIONS + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))]) + +(define_insn "*float2_mixed_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "@ + fild%Z1\t%1 + %vcvtsi2s\t{%1, %d0|%d0, %1} + %vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt,sseicvt") + (set_attr "prefix" "orig,maybe_vex,maybe_vex") + (set_attr "mode" "") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "mode == DImode") (const_int 0))) + (const_string "1") + (const_string "*"))) + (set_attr "unit" "i387,*,*") + (set_attr "athlon_decode" "*,double,direct") + (set_attr "amdfam10_decode" "*,vector,double") + (set_attr "bdver1_decode" "*,double,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_mixed_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "@ + fild%Z1\t%1 + %vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "mode == DImode") (const_int 0))) + (const_string "1") + (const_string "*"))) + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "bdver1_decode" "*,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" ",,") + (set_attr "athlon_decode" "double,direct,double") + (set_attr "amdfam10_decode" "vector,double,double") + (set_attr "bdver1_decode" "double,direct,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[2])); + } + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1)) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + if (TARGET_INTER_UNIT_MOVES) + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + else + { + operands[5] = ix86_force_to_memory (GET_MODE (operands[1]), + operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[5])); + ix86_free_from_memory (GET_MODE (operands[1])); + } + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + gcc_unreachable (); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_insn "*float2_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "bdver1_decode" "double,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_sse_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "%vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "mode == DImode") (const_int 0))) + (const_string "1") + (const_string "*"))) + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "bdver1_decode" "double,direct") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))]) + +(define_insn "*float2_sse_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "%vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "mode == DImode") (const_int 0))) + (const_string "1") + (const_string "*"))) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0]))))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))]) + +(define_insn "*float2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))] + "TARGET_80387 + && X87_ENABLE_FLOAT (mode, mode)" + "@ + fild%Z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "TARGET_80387 + && X87_ENABLE_FLOAT (mode, mode)" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && X87_ENABLE_FLOAT (mode, mode) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && X87_ENABLE_FLOAT (mode, mode) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) + +;; Avoid store forwarding (partial memory) stall penalty +;; by passing DImode value through XMM registers. */ + +(define_insn "floatdi2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_scratch:V4SI 3 "=X,x")) + (clobber (match_scratch:V4SI 4 "=X,x")) + (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] + "TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "register_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] +{ + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 0))); + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], + operands[4])); + + operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "memory_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) + +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] +{ + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); +}) + +(define_expand "floatunssi2" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3 ""))])] + "!TARGET_64BIT + && ((TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); + DONE; + } + else + { + enum ix86_stack_slot slot = (virtuals_instantiated + ? SLOT_TEMP + : SLOT_VIRTUAL); + operands[2] = assign_386_stack_local (DImode, slot); + } +}) + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "TARGET_64BIT && TARGET_SSE_MATH" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + && TARGET_SSE2 && TARGET_SSE_MATH" +{ + if (TARGET_64BIT) + x86_emit_floatuns (operands); + else + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; +}) + +;; Add instructions + +(define_expand "add3" + [(set (match_operand:SDWIM 0 "nonimmediate_operand" "") + (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "") + (match_operand:SDWIM 2 "" "")))] + "" + "ix86_expand_binary_operator (PLUS, mode, operands); DONE;") + +(define_insn_and_split "*add3_doubleword" + [(set (match_operand: 0 "nonimmediate_operand" "=r,o") + (plus: + (match_operand: 1 "nonimmediate_operand" "%0,0") + (match_operand: 2 "" "ro,r"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DWIH + (match_dup 4) + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);") + +(define_insn "*add3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC + [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI48 2 "" "r,rm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (plus:SWI48 (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "addqi3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC + [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qn,qm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "add{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*lea_1" + [(set (match_operand:P 0 "register_operand" "=r") + (match_operand:P 1 "no_seg_address_operand" "p"))] + "" + "lea{}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "")]) + +(define_insn "*lea_2" + [(set (match_operand:SI 0 "register_operand" "=r") + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))] + "TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))] + "TARGET_64BIT" + "lea{l}\t{%a1, %k0|%k0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*add_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:SWI48 2 "" ",r,0,l"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{}\t%0"; + } + + default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], mode)) + return "sub{}\t{%2, %0|%0, %2}"; + + return "add{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "3") + (const_string "lea") + (match_operand:SWI48 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "")]) + +;; It may seem that nonimmediate operand is proper one for operand 1. +;; The addsi_1 pattern allows nonimmediate operand at that place and +;; we take care in ix86_binary_operator_ok to not allow two memory +;; operands so proper swapping will be done in reload. This allow +;; patterns constructed from addsi_1 to match. + +(define_insn "*addsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r") + (match_operand:SI 2 "general_operand" "g,0,li")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + if (x86_maybe_negate_const_int (&operands[2], SImode)) + return "sub{l}\t{%2, %k0|%k0, %2}"; + + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*addhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], HImode)) + return "sub{w}\t{%2, %0|%0, %2}"; + + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:HI 2 "general_operand" "rmn,rn,0,ln"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], HImode)) + return "sub{w}\t{%2, %0|%0, %2}"; + + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "3") + (const_string "lea") + (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "HI,HI,HI,SI")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*addqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], QImode)) + { + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "QI,QI,SI")]) + +;; %%% Potential partial reg stall on alternatives 3 and 4. What to do? +(define_insn "*addqi_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,q,r,r,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,r") + (match_operand:QI 2 "general_operand" "qmn,qn,0,rn,0,ln"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 3 || which_alternative == 4); + + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + /* For most processors, ADD is faster than LEA. These alternatives + were added to use ADD as much as possible. */ + if (which_alternative == 2 || which_alternative == 4) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], QImode)) + { + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5") + (const_string "lea") + (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "QI,QI,QI,SI,SI,SI")]) + +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qnm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[1] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[1] == constm1_rtx); + return "dec{b}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[1], QImode)) + return "sub{b}\t{%1, %0|%0, %1}"; + + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 1 "incdec_operand" "") + (const_string "incdec") + (const_string "alu1"))) + (set (attr "memory") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "load") + (const_string "none"))) + (set_attr "mode" "QI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (plus (match_operand 1 "register_operand" "") + (match_operand 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_lea_for_add_ok (insn, operands)" + [(const_int 0)] +{ + rtx pat; + enum machine_mode mode = GET_MODE (operands[0]); + + /* In -fPIC mode the constructs like (const (unspec [symbol_ref])) + may confuse gen_lowpart. */ + if (mode != Pmode) + { + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + } + + pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]); + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); + + if (TARGET_64BIT && mode != Pmode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Convert lea to the lea pattern to avoid flags dependency. +;; ??? This pattern handles immediate operands that do not satisfy immediate +;; operand predicate (LEGITIMATE_CONSTANT_P) in the previous pattern. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (plus:DI (match_dup 1) (match_dup 2)))]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && ix86_lea_for_add_ok (insn, operands)" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (DImode, operands[1]); + operands[2] = gen_lowpart (DImode, operands[2]); +}) + +(define_insn "*add_2" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI 2 "" ",")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=,m") + (plus:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], mode)) + return "sub{}\t{%2, %0|%0, %2}"; + + return "add{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SWI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], SImode)) + return "sub{l}\t{%2, %k0|%k0, %2}"; + + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*add_3" + [(set (reg FLAGS_REG) + (compare + (neg:SWI (match_operand:SWI 2 "" "")) + (match_operand:SWI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], mode)) + return "sub{}\t{%2, %0|%0, %2}"; + + return "add{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SWI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_3_zext" + [(set (reg FLAGS_REG) + (compare + (neg:SI (match_operand:SI 2 "general_operand" "g")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) + && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], SImode)) + return "sub{l}\t{%2, %k0|%k0, %2}"; + + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. + +(define_insn "*adddi_4" + [(set (reg FLAGS_REG) + (compare + (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:DI 2 "x86_64_immediate_operand" "e"))) + (clobber (match_scratch:DI 0 "=rm"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{q}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], DImode)) + return "add{q}\t{%2, %0|%0, %2}"; + + return "sub{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "DI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. + +(define_insn "*add_4" + [(set (reg FLAGS_REG) + (compare + (match_operand:SWI124 1 "nonimmediate_operand" "0") + (match_operand:SWI124 2 "const_int_operand" "n"))) + (clobber (match_scratch:SWI124 0 "=m"))] + "ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], mode)) + return "add{}\t{%2, %0|%0, %2}"; + + return "sub{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand: 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*add_5" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], mode)) + return "sub{}\t{%2, %0|%0, %2}"; + + return "add{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SWI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*addqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "addqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "Qmn"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "add{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +;; The lea patterns for non-Pmodes needs to be matched by +;; several insns converted to real lea by splitters. + +(define_insn_and_split "*lea_general_1" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (match_operand 1 "index_register_operand" "l") + (match_operand 2 "register_operand" "r")) + (match_operand 3 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[2]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 3 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2" + [(set (match_operand 0 "register_operand" "=r") + (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "nonmemory_operand" "ri")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (mult:SI + (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "nonmemory_operand" "ri"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "register_operand" "r")) + (match_operand 4 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[3])" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); + pat = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], + operands[2]), + operands[3]), + operands[4]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (mult:SI + (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "register_operand" "r")) + (match_operand:SI 4 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (match_dup 4)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +;; Subtract instructions + +(define_expand "sub3" + [(set (match_operand:SDWIM 0 "nonimmediate_operand" "") + (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "") + (match_operand:SDWIM 2 "" "")))] + "" + "ix86_expand_binary_operator (MINUS, mode, operands); DONE;") + +(define_insn_and_split "*sub3_doubleword" + [(set (match_operand: 0 "nonimmediate_operand" "=r,o") + (minus: + (match_operand: 1 "nonimmediate_operand" "0,0") + (match_operand: 2 "" "ro,r"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (minus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DWIH + (match_dup 4) + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);") + +(define_insn "*sub_1" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, mode, operands)" + "sub{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*subsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*sub_2" + [(set (reg FLAGS_REG) + (compare + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, mode, operands)" + "sub{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*subsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*sub_3" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m"))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, mode, operands)" + "sub{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*subsi_3_zext" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %1|%1, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; Add with carry and subtract with borrow + +(define_expand "3_carry" + [(parallel + [(set (match_operand:SWI 0 "nonimmediate_operand" "") + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "") + (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:SWI 2 "" "")))) + (clobber (reg:CC FLAGS_REG))])] + "ix86_binary_operator_ok (, mode, operands)") + +(define_insn "*3_carry" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (plus:SWI + (match_operator 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI 2 "" ",m")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "")]) + +(define_insn "*addsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (plus:SI (match_operator 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "general_operand" "g"))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (plus:SI (match_operator 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "general_operand" "g"))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +;; Overflow setting add and subtract instructions + +(define_insn "*add3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "")) + (match_dup 1))) + (clobber (match_scratch:SWI 0 "="))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*sub3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (minus:SWI + (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")) + (match_dup 0)))] + "" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*3_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m")) + (match_dup 1))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (plusminus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*si3_zext_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SI + (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (match_dup 1))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "xf3" + [(set (match_operand:XF 0 "register_operand" "") + (plusminus:XF + (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387") + +(define_expand "3" + [(set (match_operand:MODEF 0 "register_operand" "") + (plusminus:MODEF + (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "(TARGET_80387 && X87_ENABLE_ARITH (mode)) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)") + +;; Multiply instructions + +(define_expand "mul3" + [(parallel [(set (match_operand:SWIM248 0 "register_operand" "") + (mult:SWIM248 + (match_operand:SWIM248 1 "register_operand" "") + (match_operand:SWIM248 2 "" ""))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_expand "mulqi3" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (mult:QI + (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH") + +;; On AMDFAM10 +;; IMUL reg32/64, reg32/64, imm8 Direct +;; IMUL reg32/64, mem32/64, imm8 VectorPath +;; IMUL reg32/64, reg32/64, imm32 Direct +;; IMUL reg32/64, mem32/64, imm32 VectorPath +;; IMUL reg32/64, reg32/64 Direct +;; IMUL reg32/64, mem32/64 Direct +;; +;; On BDVER1, all above IMULs use DirectPath + +(define_insn "*mul3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r,r,r") + (mult:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SWI48 2 "" "K,,mr"))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{}\t{%2, %1, %0|%0, %1, %2} + imul{}\t{%2, %1, %0|%0, %1, %2} + imul{}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "")]) + +(define_insn "*mulsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SI 2 "general_operand" "K,i,mr")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +;; On AMDFAM10 +;; IMUL reg16, reg16, imm8 VectorPath +;; IMUL reg16, mem16, imm8 VectorPath +;; IMUL reg16, reg16, imm16 VectorPath +;; IMUL reg16, mem16, imm16 VectorPath +;; IMUL reg16, reg16 Direct +;; IMUL reg16, mem16 Direct +;; +;; On BDVER1, all HI MULs use DoublePath + +(define_insn "*mulhi3_1" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:HI 2 "general_operand" "K,n,mr"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1,2") + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(eq_attr "alternative" "0,1") + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "double") + (set_attr "mode" "HI")]) + +;;On AMDFAM10 and BDVER1 +;; MUL reg8 Direct +;; MUL mem8 Direct + +(define_insn "*mulqi3_1" + [(set (match_operand:QI 0 "register_operand" "=a") + (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "QI")]) + +(define_expand "mul3" + [(parallel [(set (match_operand: 0 "register_operand" "") + (mult: + (any_extend: + (match_operand:DWIH 1 "nonimmediate_operand" "")) + (any_extend: + (match_operand:DWIH 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_expand "mulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI + (any_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "")) + (any_extend:HI + (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH") + +(define_insn "*mul3_1" + [(set (match_operand: 0 "register_operand" "=A") + (mult: + (any_extend: + (match_operand:DWIH 1 "nonimmediate_operand" "%0")) + (any_extend: + (match_operand:DWIH 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "")]) + +(define_insn "*mulqihi3_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (mult:HI + (any_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "%0")) + (any_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "QI")]) + +(define_expand "mul3_highpart" + [(parallel [(set (match_operand:SWI48 0 "register_operand" "") + (truncate:SWI48 + (lshiftrt: + (mult: + (any_extend: + (match_operand:SWI48 1 "nonimmediate_operand" "")) + (any_extend: + (match_operand:SWI48 2 "register_operand" ""))) + (match_dup 4)))) + (clobber (match_scratch:SWI48 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "operands[4] = GEN_INT (GET_MODE_BITSIZE (mode));") + +(define_insn "*muldi3_highpart_1" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI + (any_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "%a")) + (any_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "DI")]) + +(define_insn "*mulsi3_highpart_1" + [(set (match_operand:SI 0 "register_operand" "=d") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (any_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +(define_insn "*mulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (any_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (any_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "mulxf3" + [(set (match_operand:XF 0 "register_operand" "") + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387") + +(define_expand "mul3" + [(set (match_operand:MODEF 0 "register_operand" "") + (mult:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "(TARGET_80387 && X87_ENABLE_ARITH (mode)) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)") + +;; Divide instructions + +;; The patterns that match these are at the end of this file. + +(define_expand "divxf3" + [(set (match_operand:XF 0 "register_operand" "") + (div:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387") + +(define_expand "divdf3" + [(set (match_operand:DF 0 "register_operand" "") + (div:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")))] + "(TARGET_80387 && X87_ENABLE_ARITH (DFmode)) + || (TARGET_SSE2 && TARGET_SSE_MATH)") + +(define_expand "divsf3" + [(set (match_operand:SF 0 "register_operand" "") + (div:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")))] + "(TARGET_80387 && X87_ENABLE_ARITH (SFmode)) + || TARGET_SSE_MATH" +{ + if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) + +;; Divmod instructions. + +(define_expand "divmod4" + [(parallel [(set (match_operand:SWIM248 0 "register_operand" "") + (div:SWIM248 + (match_operand:SWIM248 1 "register_operand" "") + (match_operand:SWIM248 2 "nonimmediate_operand" ""))) + (set (match_operand:SWIM248 3 "register_operand" "") + (mod:SWIM248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Split with 8bit unsigned divide: +;; if (dividend an divisor are in [0-255]) +;; use 8bit unsigned integer divide +;; else +;; use original integer divide +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (div:SWI48 (match_operand:SWI48 2 "register_operand" "") + (match_operand:SWI48 3 "nonimmediate_operand" ""))) + (set (match_operand:SWI48 1 "register_operand" "") + (mod:SWI48 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (mode, operands, true); DONE;") + +(define_insn_and_split "divmod4_1" + [(set (match_operand:SWI48 0 "register_operand" "=a") + (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWI48 1 "register_operand" "=&d") + (mod:SWI48 (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SWI48 (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SWI48 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (mod:SWI48 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (mode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_insn_and_split "*divmod4" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=&d") + (mod:SWIM248 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SWIM248 (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SWIM248 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (mod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (mode)-1); + + if (mode != HImode + && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_insn "*divmod4_noext" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=d") + (mod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_operand:SWIM248 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "" + "idiv{}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "")]) + +(define_expand "divmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (div:QI + (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonimmediate_operand" ""))) + (set (match_operand:QI 3 "register_operand" "") + (mod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, mod, insn; + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (HImode); + tmp1 = gen_reg_rtx (HImode); + + /* Extend operands[1] to HImode. Generate 8bit divide. Result is + in AX. */ + emit_insn (gen_extendqihi2 (tmp1, operands[1])); + emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2])); + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8)); + insn = emit_move_insn (operands[3], tmp1); + + mod = gen_rtx_MOD (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Extract quotient from AL. */ + insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); + + div = gen_rtx_DIV (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, div); + + DONE; +}) + +;; Divide AX by r/m8, with result stored in +;; AL <- Quotient +;; AH <- Remainder +;; Change div/mod to HImode and extend the second argument to HImode +;; so that mode of div/mod matches with mode of arguments. Otherwise +;; combine may fail. +(define_insn "divmodhiqi3" + [(set (match_operand:HI 0 "register_operand" "=a") + (ior:HI + (ashift:HI + (zero_extend:HI + (truncate:QI + (mod:HI (match_operand:HI 1 "register_operand" "0") + (sign_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm"))))) + (const_int 8)) + (zero_extend:HI + (truncate:QI + (div:HI (match_dup 1) (sign_extend:HI (match_dup 2))))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "idiv{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +(define_expand "udivmod4" + [(parallel [(set (match_operand:SWIM248 0 "register_operand" "") + (udiv:SWIM248 + (match_operand:SWIM248 1 "register_operand" "") + (match_operand:SWIM248 2 "nonimmediate_operand" ""))) + (set (match_operand:SWIM248 3 "register_operand" "") + (umod:SWIM248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Split with 8bit unsigned divide: +;; if (dividend an divisor are in [0-255]) +;; use 8bit unsigned integer divide +;; else +;; use original integer divide +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "") + (match_operand:SWI48 3 "nonimmediate_operand" ""))) + (set (match_operand:SWI48 1 "register_operand" "") + (umod:SWI48 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (mode, operands, false); DONE;") + +(define_insn_and_split "udivmod4_1" + [(set (match_operand:SWI48 0 "register_operand" "=a") + (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWI48 1 "register_operand" "=&d") + (umod:SWI48 (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SWI48 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (umod:SWI48 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_insn_and_split "*udivmod4" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=&d") + (umod:SWIM248 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SWIM248 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (umod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_insn "*udivmod4_noext" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=d") + (umod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_operand:SWIM248 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "" + "div{}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "")]) + +(define_expand "udivmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (udiv:QI + (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonimmediate_operand" ""))) + (set (match_operand:QI 3 "register_operand" "") + (umod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, mod, insn; + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (HImode); + tmp1 = gen_reg_rtx (HImode); + + /* Extend operands[1] to HImode. Generate 8bit divide. Result is + in AX. */ + emit_insn (gen_zero_extendqihi2 (tmp1, operands[1])); + emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2])); + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8)); + tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0); + insn = emit_move_insn (operands[3], tmp1); + + mod = gen_rtx_UMOD (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Extract quotient from AL. */ + insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); + + div = gen_rtx_UDIV (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, div); + + DONE; +}) + +(define_insn "udivmodhiqi3" + [(set (match_operand:HI 0 "register_operand" "=a") + (ior:HI + (ashift:HI + (zero_extend:HI + (truncate:QI + (mod:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm"))))) + (const_int 8)) + (zero_extend:HI + (truncate:QI + (div:HI (match_dup 1) (zero_extend:HI (match_dup 2))))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +;; We cannot use div/idiv for double division, because it causes +;; "division by zero" on the overflow and that's not what we expect +;; from truncate. Because true (non truncating) double division is +;; never generated, we can't create this insn anyway. +; +;(define_insn "" +; [(set (match_operand:SI 0 "register_operand" "=a") +; (truncate:SI +; (udiv:DI (match_operand:DI 1 "register_operand" "A") +; (zero_extend:DI +; (match_operand:SI 2 "nonimmediate_operand" "rm"))))) +; (set (match_operand:SI 3 "register_operand" "=d") +; (truncate:SI +; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) +; (clobber (reg:CC FLAGS_REG))] +; "" +; "div{l}\t{%2, %0|%0, %2}" +; [(set_attr "type" "idiv")]) + +;;- Logical AND instructions + +;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al. +;; Note that this excludes ah. + +(define_expand "testsi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "nonmemory_operand" "")) + (const_int 0)))]) + +(define_expand "testqi_ccz_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "nonmemory_operand" "")) + (const_int 0)))]) + +(define_expand "testdi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:DI (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "x86_64_szext_general_operand" "")) + (const_int 0)))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))") + +(define_insn "*testdi_1" + [(set (reg FLAGS_REG) + (compare + (and:DI + (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm") + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + test{l}\t{%k1, %k0|%k0, %k1} + test{l}\t{%k1, %k0|%k0, %k1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,0,1,1") + (set_attr "mode" "SI,SI,DI,DI,DI")]) + +(define_insn "*testqi_1_maybe_si" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r") + (match_operand:QI 1 "general_operand" "n,n,qn,n")) + (const_int 0)))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[1]) + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 3) + { + if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0) + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "test{l}\t{%1, %k0|%k0, %1}"; + } + return "test{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1,1") + (set_attr "mode" "QI,QI,QI,SI") + (set_attr "pent_pair" "uv,np,uv,np")]) + +(define_insn "*test_1" + [(set (reg FLAGS_REG) + (compare + (and:SWI124 + (match_operand:SWI124 0 "nonimmediate_operand" "%!*a,,m") + (match_operand:SWI124 1 "general_operand" ",,")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "test{}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ext_ccno_0" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "")) + (const_int 0)))]) + +(define_insn "*testqi_ext_0" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "pent_pair" "np")]) + +(define_insn "*testqi_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "register_operand" "Q"))) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "general_operand" "Qm"))) + (const_int 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8))) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_3_rex64" + [(set (reg FLAGS_REG) + (compare (zero_extract:DI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:DI 1 "const_int_operand" "") + (match_operand:DI 2 "const_int_operand" "")) + (const_int 0)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + /* Ensure that resulting mask is zero or sign extended operand. */ + && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 + && INTVAL (operands[1]) > 32)) + && (GET_MODE (operands[0]) == SImode + || GET_MODE (operands[0]) == DImode + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") + +;; Combine likes to form bit extractions for some tests. Humor it. +(define_insn "*testqi_ext_3" + [(set (reg FLAGS_REG) + (compare (zero_extract:SI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + && (GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode) + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(zero_extract + (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] +{ + rtx val = operands[2]; + HOST_WIDE_INT len = INTVAL (operands[3]); + HOST_WIDE_INT pos = INTVAL (operands[4]); + HOST_WIDE_INT mask; + enum machine_mode mode, submode; + + mode = GET_MODE (val); + if (MEM_P (val)) + { + /* ??? Combine likes to put non-volatile mem extractions in QImode + no matter the size of the test. So find a mode that works. */ + if (! MEM_VOLATILE_P (val)) + { + mode = smallest_mode_for_size (pos + len, MODE_INT); + val = adjust_address (val, mode, 0); + } + } + else if (GET_CODE (val) == SUBREG + && (submode = GET_MODE (SUBREG_REG (val)), + GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) + && pos + len <= GET_MODE_BITSIZE (submode) + && GET_MODE_CLASS (submode) == MODE_INT) + { + /* Narrow a paradoxical subreg to prevent partial register stalls. */ + mode = submode; + val = SUBREG_REG (val); + } + else if (mode == HImode && pos + len <= 8) + { + /* Small HImode tests can be converted to QImode. */ + mode = QImode; + val = gen_lowpart (QImode, val); + } + + if (len == HOST_BITS_PER_WIDE_INT) + mask = -1; + else + mask = ((HOST_WIDE_INT)1 << len) - 1; + mask <<= pos; + + operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); +}) + +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the conversion only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && QI_REG_P (operands[2]) + && GET_MODE (operands[2]) != QImode + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~(127 << 8))))" + [(set (match_dup 0) + (match_op_dup 1 + [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) + (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && GET_MODE (operands[2]) != QImode + && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~127)))" + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (QImode, operands[2]); + operands[3] = gen_lowpart (QImode, operands[3]);") + +;; %%% This used to optimize known byte-wide and operations to memory, +;; and sometimes to QImode registers. If this is considered useful, +;; it should be done with splitters. + +(define_expand "and3" + [(set (match_operand:SWIM 0 "nonimmediate_operand" "") + (and:SWIM (match_operand:SWIM 1 "nonimmediate_operand" "") + (match_operand:SWIM 2 "" "")))] + "" + "ix86_expand_binary_operator (AND, mode, operands); DONE;") + +(define_insn "*anddi_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:DI + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; + + gcc_assert (CONST_INT_P (operands[2])); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + else + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "and{l}\t{%k2, %k0|%k0, %k2}"; + else + return "and{q}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,alu,imovx") + (set_attr "length_immediate" "*,*,*,0") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "type" "imovx") + (and (ne (symbol_ref "INTVAL (operands[2]) == 0xff") (const_int 0)) + (match_operand 1 "ext_QIreg_nomode_operand" ""))) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI,DI,DI,SI")]) + +(define_insn "*andsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:SI 2 "general_operand" "ri,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; + + gcc_assert (CONST_INT_P (operands[2])); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bl|x}\t{%1, %0|%0, %1}"; + else + return "movz{wl|x}\t{%1, %0|%0, %1}"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "and{l}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "type" "imovx") + (and (ne (symbol_ref "INTVAL (operands[2]) == 0xff") (const_int 0)) + (match_operand 1 "ext_QIreg_nomode_operand" ""))) + (const_string "1") + (const_string "*"))) + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:HI 2 "general_operand" "rn,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (INTVAL (operands[2]) == 0xff); + return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + return "and{w}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "type" "imovx") + (match_operand 1 "ext_QIreg_nomode_operand" "")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "HI,HI,SI")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*andqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, QImode, operands)" + "@ + and{b}\t{%2, %0|%0, %2} + and{b}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*andqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_dup 0) + (const_int -65536))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL) + || optimize_function_for_size_p (cfun)" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (HImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -256))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (QImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -65281))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" + [(parallel [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]);") + +(define_insn "*anddi_2" + [(set (reg FLAGS_REG) + (compare + (and:DI + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm") + (and:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, DImode, operands)" + "@ + and{l}\t{%k2, %k0|%k0, %k2} + and{q}\t{%2, %0|%0, %2} + and{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI,DI,DI")]) + +(define_insn "*andqi_2_maybe_si" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,n")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (AND, QImode, operands) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[2]) + && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 2) + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); + return "and{l}\t{%2, %k0|%k0, %2}"; + } + return "and{b}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*and_2" + [(set (reg FLAGS_REG) + (compare (and:SWI124 + (match_operand:SWI124 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI124 2 "general_operand" ",")) + (const_int 0))) + (set (match_operand:SWI124 0 "nonimmediate_operand" "=,m") + (and:SWI124 (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, mode, operands)" + "and{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_2_zext" + [(set (reg FLAGS_REG) + (compare (and:SI + (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andqi_2_slp" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "nonimmediate_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (and:QI (match_dup 0) (match_dup 1)))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +;; ??? A bug in recog prevents it from recognizing a const_int as an +;; operand to zero_extend in andqi_ext_1. It was checking explicitly +;; for a QImode operand, which of course failed. +(define_insn "andqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +;; Generated by peephole translating test to and. This shows up +;; often in fp comparisons. +(define_insn "*andqi_ext_0_cc" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode)" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it introduces risk +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Logical inclusive and exclusive OR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "3" + [(set (match_operand:SWIM 0 "nonimmediate_operand" "") + (any_or:SWIM (match_operand:SWIM 1 "nonimmediate_operand" "") + (match_operand:SWIM 2 "" "")))] + "" + "ix86_expand_binary_operator (, mode, operands); DONE;") + +(define_insn "*_1" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,rm") + (any_or:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI248 2 "" ",r"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*qi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, QImode, operands)" + "@ + {b}\t{%2, %0|%0, %2} + {b}\t{%2, %0|%0, %2} + {l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*si_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*si_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_or:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*qi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m")) + (any_or:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qmn,qn"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*_2" + [(set (reg FLAGS_REG) + (compare (any_or:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI 2 "" ",")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=,m") + (any_or:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*si_2_zext" + [(set (reg FLAGS_REG) + (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*si_2_zext_imm" + [(set (reg FLAGS_REG) + (compare (any_or:SI + (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*qi_2_slp" + [(set (reg FLAGS_REG) + (compare (any_or:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (any_or:QI (match_dup 0) (match_dup 1)))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*_3" + [(set (reg FLAGS_REG) + (compare (any_or:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*qi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*qi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*qi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*qi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (any_or (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (any_or:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (any_or (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (any_or:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +(define_expand "xorqi_cc_ext_1" + [(parallel [ + (set (reg:CCNO FLAGS_REG) + (compare:CCNO + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))])]) + +(define_insn "*xorqi_cc_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +;; Negation instructions + +(define_expand "neg2" + [(set (match_operand:SDWIM 0 "nonimmediate_operand" "") + (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NEG, mode, operands); DONE;") + +(define_insn_and_split "*neg2_doubleword" + [(set (match_operand: 0 "nonimmediate_operand" "=ro") + (neg: (match_operand: 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, mode, operands)" + "#" + "reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:DWIH (match_dup 1)))]) + (parallel + [(set (match_dup 2) + (plus:DWIH (match_dup 3) + (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (const_int 0)))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 2) + (neg:DWIH (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);") + +(define_insn "*neg2_1" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, mode, operands)" + "neg{}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "")]) + +;; Combine is quite creative about this pattern. +(define_insn "*negsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI + (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*neg2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (neg:SWI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, mode, operands)" + "neg{}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "")]) + +(define_insn "*negsi2_cmpz_zext" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (lshiftrt:DI + (neg:DI (ashift:DI + (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1) + (const_int 32))) + (const_int 32)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; Changing of sign for FP values is doable using integer unit too. + +(define_expand "2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") + +(define_insn "*absneg2_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) + (use (match_operand: 2 "nonimmediate_operand" "xm,0,X,X")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)" + "#") + +(define_insn "*absneg2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0 ,x,0")])) + (use (match_operand: 2 "register_operand" "xm,0,X")) + (clobber (reg:CC FLAGS_REG))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#") + +(define_insn "*absneg2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") + (match_operator:X87MODEF 3 "absneg_operator" + [(match_operand:X87MODEF 1 "register_operand" "0,0")])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "#") + +(define_expand "tf2" + [(set (match_operand:TF 0 "register_operand" "") + (absneg:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (, TFmode, operands); DONE;") + +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "register_operand" "=x,x") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "register_operand" "0,x")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE2" + "#") + +;; Splitters for fp abs and neg. + +(define_split + [(set (match_operand 0 "fp_register_operand" "") + (match_operator 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "absneg_operator" + [(match_operand 1 "register_operand" "")])) + (use (match_operand 2 "nonimmediate_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) (match_dup 3))] +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum machine_mode vmode = GET_MODE (operands[2]); + rtx tmp; + + operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0); + operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0); + if (operands_match_p (operands[0], operands[2])) + { + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } + if (GET_CODE (operands[3]) == ABS) + tmp = gen_rtx_AND (vmode, operands[1], operands[2]); + else + tmp = gen_rtx_XOR (vmode, operands[1], operands[2]); + operands[3] = tmp; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand:V4SF 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_lowpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + if (TARGET_64BIT) + { + tmp = gen_lowpart (DImode, operands[0]); + tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63)); + operands[0] = tmp; + + if (GET_CODE (operands[1]) == ABS) + tmp = const0_rtx; + else + tmp = gen_rtx_NOT (DImode, tmp); + } + else + { + operands[0] = gen_highpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_64BIT ? 1 : 2)); + if (GET_CODE (operands[1]) == ABS) + { + tmp = GEN_INT (0x7fff); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = GEN_INT (0x8000); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +;; Conditionalize these after reload. If they match before reload, we +;; lose the clobber and ability to use integer instructions. + +(define_insn "*2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed + || !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "")]) + +(define_insn "*extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (absneg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*extendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*extenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +;; Copysign instructions + +(define_mode_iterator CSGNMODE [SF DF TF]) +(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")]) + +(define_expand "copysign3" + [(match_operand:CSGNMODE 0 "register_operand" "") + (match_operand:CSGNMODE 1 "nonmemory_operand" "") + (match_operand:CSGNMODE 2 "register_operand" "")] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" + "ix86_expand_copysign (operands); DONE;") + +(define_insn_and_split "copysign3_const" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + (unspec:CSGNMODE + [(match_operand: 1 "vector_move_operand" "xmC") + (match_operand:CSGNMODE 2 "register_operand" "0") + (match_operand: 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_copysign_const (operands); DONE;") + +(define_insn "copysign3_var" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") + (match_operand: 4 "nonimmediate_operand" "X,xm,xm,0,0") + (match_operand: 5 "nonimmediate_operand" "0,xm,1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 "=x,x,x,x,x"))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" + "#") + +(define_split + [(set (match_operand:CSGNMODE 0 "register_operand" "") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "") + (match_operand:CSGNMODE 3 "register_operand" "") + (match_operand: 4 "" "") + (match_operand: 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 ""))] + "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))) + && reload_completed" + [(const_int 0)] + "ix86_split_copysign_var (operands); DONE;") + +;; One complement instructions + +(define_expand "one_cmpl2" + [(set (match_operand:SWIM 0 "nonimmediate_operand" "") + (not:SWIM (match_operand:SWIM 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NOT, mode, operands); DONE;") + +(define_insn "*one_cmpl2_1" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") + (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, mode, operands)" + "not{}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "")]) + +;; %%% Potential partial reg stall on alternative 1. What to do? +(define_insn "*one_cmplqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))] + "ix86_unary_operator_ok (NOT, QImode, operands)" + "@ + not{b}\t%0 + not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI,SI")]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (not:SI (match_operand:SI 1 "register_operand" "0"))))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*one_cmpl2_2" + [(set (reg FLAGS_REG) + (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (not:SWI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, mode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SWI (match_operand:SWI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:SWI 1 "nonimmediate_operand" "") + (not:SWI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:SWI (match_dup 3) (const_int -1)))])]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_2_zext" + [(set (reg FLAGS_REG) + (compare (not:SI (match_operand:SI 1 "register_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_dup 1))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "register_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "register_operand" "") + (zero_extend:DI (not:SI (match_dup 3))))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) + +;; Shift instructions + +;; DImode shifts are implemented using the i386 "shift double" opcode, +;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count +;; is variable, then the count is in %cl and the "imm" operand is dropped +;; from the assembler input. +;; +;; This instruction shifts the target reg/mem as usual, but instead of +;; shifting in zeros, bits are shifted in from reg operand. If the insn +;; is a left shift double, bits are taken from the high order bits of +;; reg, else if the insn is a shift right double, bits are taken from the +;; low order bits of reg. So if %eax is "1234" and %edx is "5678", +;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345". +;; +;; Since sh[lr]d does not change the `reg' operand, that is done +;; separately, making all shifts emit pairs of shift double and normal +;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to +;; support a 63 bit shift, each shift where the count is in a reg expands +;; to a pair of shifts, a branch, a shift by 32 and a label. +;; +;; If the shift count is a constant, we need never emit more than one +;; shift pair, instead using moves and sign extension for counts greater +;; than 31. + +(define_expand "ashl3" + [(set (match_operand:SDWIM 0 "" "") + (ashift:SDWIM (match_operand:SDWIM 1 "" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFT, mode, operands); DONE;") + +(define_insn "*ashl3_doubleword" + [(set (match_operand:DWI 0 "register_operand" "=&r,r") + (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "c,c"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:DWI 0 "register_operand" "") + (ashift:DWI (match_operand:DWI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(optimize && flag_peephole2) ? epilogue_completed : reload_completed" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, mode); DONE;") + +;; By default we don't ask for a scratch register, because when DWImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. + +(define_peephole2 + [(match_scratch:DWIH 3 "r") + (parallel [(set (match_operand: 0 "register_operand" "") + (ashift: + (match_operand: 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], mode); DONE;") + +(define_insn "x86_64_shld" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashift:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shld{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "x86_shld" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") + (ior:SI (ashift:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "shld{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_expand "x86_shift_adj_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (match_dup 4)) + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "") + (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SWI48 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SWI48 3 "register_operand" "") + (match_dup 1)))] + "TARGET_CMOVE" + "operands[4] = GEN_INT (GET_MODE_BITSIZE (mode));") + +(define_expand "x86_shift_adj_2" + [(use (match_operand:SWI48 0 "register_operand" "")) + (use (match_operand:SWI48 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], + GEN_INT (GET_MODE_BITSIZE (mode)))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +;; Avoid useless masking of count operand. +(define_insn "*ashl3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (ashift:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) + == GET_MODE_BITSIZE (mode)-1" +{ + return "sal{}\t{%b2, %0|%0, %b2}"; +} + [(set_attr "type" "ishift") + (set_attr "mode" "")]) + +(define_insn "*ashl3_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "c,M"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{}\t%0"; + else + return "sal{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "register_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "HI")]) + +(define_insn "*ashlhi3_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "HI,SI")]) + +(define_insn "*ashlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t%k0, %k0"; + else + return "add{b}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%k0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI,SI")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*ashlqi3_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t%k0, %k0"; + else + return "add{b}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%k0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI,SI,SI")]) + +(define_insn "*ashlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashift:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[1] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[1] == const1_rtx); + return "add{b}\t%0, %0"; + + default: + if (operands[1] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%0"; + else + return "sal{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 1 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift1"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift1") + (and (match_operand 1 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "index_register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat; + enum machine_mode mode = GET_MODE (operands[0]); + + if (mode != Pmode) + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); + + pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); + + if (TARGET_64BIT && mode != Pmode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "index_register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (mult:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (DImode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode); +}) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashl3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "" "")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (ashift:SWI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{}\t%0"; + else + return "sal{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*ashlsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashl3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:SWI (match_operand:SWI 1 "register_operand" "0") + (match_operand:QI 2 "" "")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "="))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{}\t%0"; + else + return "sal{}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +;; See comment above `ashl3' about how this works. + +(define_expand "3" + [(set (match_operand:SDWIM 0 "" "") + (any_shiftrt:SDWIM (match_operand:SDWIM 1 "" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (, mode, operands); DONE;") + +;; Avoid useless masking of count operand. +(define_insn "*3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (any_shiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) + == GET_MODE_BITSIZE (mode)-1" +{ + return "{}\t{%b2, %0|%0, %b2}"; +} + [(set_attr "type" "ishift") + (set_attr "mode" "")]) + +(define_insn_and_split "*3_doubleword" + [(set (match_operand:DWI 0 "register_operand" "=r") + (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "(optimize && flag_peephole2) ? epilogue_completed : reload_completed" + [(const_int 0)] + "ix86_split_ (operands, NULL_RTX, mode); DONE;" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DWImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. + +(define_peephole2 + [(match_scratch:DWIH 3 "r") + (parallel [(set (match_operand: 0 "register_operand" "") + (any_shiftrt: + (match_operand: 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ (operands, operands[3], mode); DONE;") + +(define_insn "x86_64_shrd" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashiftrt:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shrd{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "x86_shrd" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") + (ior:SI (ashiftrt:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "shrd{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "ashrdi3_cvt" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 63 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + {cqto|cqo} + sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "ashrsi3_cvt" + [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[2]) == 31 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cvt_zext" + [(set (match_operand:DI 0 "register_operand" "=*d,r") + (zero_extend:DI + (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") + (match_operand:QI 2 "const_int_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 31 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_expand "x86_shift_adj_3" + [(use (match_operand:SWI48 0 "register_operand" "")) + (use (match_operand:SWI48 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], + GEN_INT (GET_MODE_BITSIZE (mode)))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashr3_cvt (operands[1], operands[1], + GEN_INT (GET_MODE_BITSIZE (mode)-1))); + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_insn "*3_1" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; + else + return "{}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{l}\t%k0"; + else + return "{l}\t{%2, %k0|%k0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*qi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (any_shiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_REG_STALL + || (operands[1] == const1_rtx + && TARGET_SHIFT1))" +{ + if (operands[1] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{b}\t%0"; + else + return "{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "ishift1") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 1 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*3_cmp" + [(set (reg FLAGS_REG) + (compare + (any_shiftrt:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "" "")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (any_shiftrt:SWI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && TARGET_SHIFT1)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (, mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; + else + return "{}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*si3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && TARGET_SHIFT1)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (, SImode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{l}\t%k0"; + else + return "{l}\t{%2, %k0|%k0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*3_cconly" + [(set (reg FLAGS_REG) + (compare + (any_shiftrt:SWI + (match_operand:SWI 1 "register_operand" "0") + (match_operand:QI 2 "" "")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "="))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && TARGET_SHIFT1)) + && ix86_match_ccmode (insn, CCGOCmode)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; + else + return "{}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +;; Rotate instructions + +(define_expand "ti3" + [(set (match_operand:TI 0 "register_operand" "") + (any_rotate:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" +{ + if (const_1_to_63_operand (operands[2], VOIDmode)) + emit_insn (gen_ix86_ti3_doubleword + (operands[0], operands[1], operands[2])); + else + FAIL; + + DONE; +}) + +(define_expand "di3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (any_rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" +{ + if (TARGET_64BIT) + ix86_expand_binary_operator (, DImode, operands); + else if (const_1_to_31_operand (operands[2], VOIDmode)) + emit_insn (gen_ix86_di3_doubleword + (operands[0], operands[1], operands[2])); + else + FAIL; + + DONE; +}) + +(define_expand "3" + [(set (match_operand:SWIM124 0 "nonimmediate_operand" "") + (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (, mode, operands); DONE;") + +;; Avoid useless masking of count operand. +(define_insn "*3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (any_rotate:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) + == GET_MODE_BITSIZE (mode)-1" +{ + return "{}\t{%b2, %0|%0, %b2}"; +} + [(set_attr "type" "rotate") + (set_attr "mode" "")]) + +;; Implement rotation using two double-precision +;; shift instructions and a scratch register. + +(define_insn_and_split "ix86_rotl3_doubleword" + [(set (match_operand: 0 "register_operand" "=r") + (rotate: (match_operand: 1 "register_operand" "0") + (match_operand:QI 2 "" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:DWIH 3 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2)) + (lshiftrt:DWIH (match_dup 5) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2)) + (lshiftrt:DWIH (match_dup 3) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[6] = GEN_INT (GET_MODE_BITSIZE (mode)); + + split_double_mode (mode, &operands[0], 1, &operands[4], &operands[5]); +}) + +(define_insn_and_split "ix86_rotr3_doubleword" + [(set (match_operand: 0 "register_operand" "=r") + (rotatert: (match_operand: 1 "register_operand" "0") + (match_operand:QI 2 "" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:DWIH 3 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2)) + (ashift:DWIH (match_dup 5) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2)) + (ashift:DWIH (match_dup 3) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[6] = GEN_INT (GET_MODE_BITSIZE (mode)); + + split_double_mode (mode, &operands[0], 1, &operands[4], &operands[5]); +}) + +(define_insn "*3_1" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; + else + return "{}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "rotate") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "*si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (any_rotate:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{l}\t%k0"; + else + return "{l}\t{%2, %k0|%k0, %2}"; +} + [(set_attr "type" "rotate") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*qi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (any_rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_REG_STALL + || (operands[1] == const1_rtx + && TARGET_SHIFT1))" +{ + if (operands[1] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{b}\t%0"; + else + return "{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "rotate1") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 1 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (any_rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Bit set / bit test instructions + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand 1 "ext_register_operand" "") + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "ext_register_operand" "") + (match_operand 1 "const8_operand" "") + (match_operand 2 "const8_operand" "")) + (match_operand 3 "register_operand" ""))] + "" +{ + rtx (*gen_mov_insv_1) (rtx, rtx); + + /* Handle insertions to %ah et al. */ + if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[0], VOIDmode)) + FAIL; + + gen_mov_insv_1 = (TARGET_64BIT + ? gen_movdi_insv_1 : gen_movsi_insv_1); + + emit_insn (gen_mov_insv_1 (operands[0], operands[3])); + DONE; +}) + +;; %%% bts, btr, btc, bt. +;; In general these instructions are *slow* when applied to memory, +;; since they enforce atomic operation. When applied to registers, +;; it depends on the cpu implementation. They're never faster than +;; the corresponding and/ior/xor operations, so with 32-bit there's +;; no point. But in 64-bit, we can't hold the relevant immediates +;; within the instruction itself, so operating on bits in the high +;; 32-bits of a register becomes easier. +;; +;; These are slow on Nocona, but fast on Athlon64. We do require the use +;; of btrq and btcq for corner cases of post-reload expansion of absdf and +;; negdf respectively, so they can never be disabled entirely. + +(define_insn "*btsq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "bts{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "*btrq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btr{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "*btcq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btc{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +;; Allow Nocona to avoid these instructions if a register is available. + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_iordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (~lo, ~hi, DImode); + if (i >= 32) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_anddi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI + (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_xordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_insn "*bt" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_operand:SWI48 0 "register_operand" "r") + (const_int 1) + (match_operand:SWI48 1 "nonmemory_operand" "rN")) + (const_int 0)))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "bt{}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "")]) + +;; Store-flag instructions. + +;; For all sCOND expanders, also expand the compare or test insn that +;; generates cc0. Generate an equality comparison if `seq' or `sne'. + +(define_insn_and_split "*setcc_di_1" + [(set (match_operand:DI 0 "register_operand" "=q") + (match_operator:DI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:DI (match_dup 2)))] +{ + PUT_MODE (operands[1], QImode); + operands[2] = gen_lowpart (QImode, operands[0]); +}) + +(define_insn_and_split "*setcc_si_1_and" + [(set (match_operand:SI 0 "register_operand" "=q") + (match_operator:SI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + PUT_MODE (operands[1], QImode); + operands[2] = gen_lowpart (QImode, operands[0]); +}) + +(define_insn_and_split "*setcc_si_1_movzbl" + [(set (match_operand:SI 0 "register_operand" "=q") + (match_operator:SI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "!TARGET_PARTIAL_REG_STALL + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:SI (match_dup 2)))] +{ + PUT_MODE (operands[1], QImode); + operands[2] = gen_lowpart (QImode, operands[0]); +}) + +(define_insn "*setcc_qi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +(define_insn "*setcc_qi_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; sete %al + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] + "PUT_MODE (operands[1], QImode);") + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] + "PUT_MODE (operands[1], QImode);") + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +;; The SSE store flag instructions saves 0 or 0xffffffff to the result. +;; subsequent logical operations are used to imitate conditional moves. +;; 0xffffffff is NaN, but not in normalized form, so we can't represent +;; it directly. + +(define_insn "*avx_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "avx_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_AVX" + "vcmp%D1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") + (set_attr "mode" "")]) + +(define_insn "*sse_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse_comparison_operator" + [(match_operand:MODEF 2 "register_operand" "0") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode)" + "cmp%D1s\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "mode" "")]) + +;; Basic conditional jump instructions. +;; We ignore the overflow flag for signed branch instructions. + +(define_insn "*jcc_1" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "%+j%C1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +(define_insn "*jcc_2" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "%+j%c1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; je Lfoo + +(define_split + [(set (pc) + (if_then_else (ne (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] + "PUT_MODE (operands[0], VOIDmode);") + +(define_split + [(set (pc) + (if_then_else (eq (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + rtx new_op0 = copy_rtx (operands[0]); + operands[0] = new_op0; + PUT_MODE (new_op0, VOIDmode); + PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0), + GET_MODE (XEXP (new_op0, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op0, VOIDmode)) + FAIL; +}) + +;; zero_extend in SImode is correct also for DImode, since this is what combine +;; pass generates from shift insn with QImode operand. Actually, the mode +;; of operand 2 (bit offset operand) doesn't matter since bt insn takes +;; appropriate modulo of the bit offset value. + +(define_insn_and_split "*jcc_bt" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (mode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; Avoid useless masking of bit offset operand. "and" in SImode is correct +;; also for DImode, this is what combine produces. +(define_insn_and_split "*jcc_bt_mask" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")))]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) + == GET_MODE_BITSIZE (mode)-1" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (mode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +(define_insn_and_split "*jcc_btsi_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btsi_mask_1" + [(set (pc) + (if_then_else + (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) 0)) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x1f) == 0x1f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] + "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") + +;; Define combination compare-and-branch fp compare instructions to help +;; combine. + +(define_insn "*fp_jcc_1_387" + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && !TARGET_CMOVE" + "#") + +(define_insn "*fp_jcc_1r_387" + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && !TARGET_CMOVE" + "#") + +(define_insn "*fp_jcc_2_387" + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !TARGET_CMOVE" + "#") + +(define_insn "*fp_jcc_2r_387" + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !TARGET_CMOVE" + "#") + +(define_insn "*fp_jcc_3_387" + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && !TARGET_CMOVE" + "#") + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "nonimmediate_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], NULL_RTX, NULL_RTX); + DONE; +}) + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "general_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a"))] + "reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], operands[5], NULL_RTX); + DONE; +}) + +;; The order of operands in *fp_jcc_4_387 is forced by combine in +;; simplify_comparison () function. Float operator is treated as RTX_OBJ +;; with a precedence over other operators and is always put in the first +;; place. Swap condition and operands to match ficom instruction. + +(define_insn "*fp_jcc_4__387" + [(set (pc) + (if_then_else + (match_operator 0 "ix86_swapped_fp_comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) + (match_operand 3 "register_operand" "f,f")]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a,a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[3])) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[1]) == GET_MODE (operands[3]) + && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode + && !TARGET_CMOVE" + "#") + +(define_split + [(set (pc) + (if_then_else + (match_operator 0 "ix86_swapped_fp_comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]); + + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], NULL_RTX); + DONE; +}) + +;; %%% Kill this when reload knows how to do it. +(define_split + [(set (pc) + (if_then_else + (match_operator 0 "ix86_swapped_fp_comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "register_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]); + + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], operands[2]); + DONE; +}) + +;; Unconditional and other jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "jmp\t%l0" + [(set_attr "type" "ibr") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 5))) + (set_attr "modrm" "0")]) + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "nonimmediate_operand" ""))] + "" + "") + +(define_insn "*indirect_jump" + [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))] + "" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op0, op1; + enum rtx_code code; + + /* We can't use @GOTOFF for text labels on VxWorks; + see gotoff_operand. */ + if (TARGET_64BIT || TARGET_VXWORKS_RTP) + { + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) + { + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; + } + else + { + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; + } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); + } +}) + +(define_insn "*tablejump_1" + [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +;; Convert setcc + movzbl to xor + setcc if operands don't overlap. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1)))] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Similar, but match zero_extendhisi2_and, which adds a clobber. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (parallel [(set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Call instructions. + +;; The predicates normally associated with named expanders are not properly +;; checked for calls. This is a bug in the generic code, but it isn't that +;; easy to fix. Ignore it for now and be prepared to fix things up. + +;; P6 processors will jump to the address after the decrement when %esp +;; is used as a call operand, so they will execute return address as a code. +;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17. + +;; Call subroutine returning no value. + +(define_expand "call_pop" + [(parallel [(call (match_operand:QI 0 "" "") + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "" "")))])] + "!TARGET_64BIT" +{ + ix86_expand_call (NULL, operands[0], operands[1], + operands[2], operands[3], 0); + DONE; +}) + +(define_insn_and_split "*call_pop_0_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "")))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_pop_0" + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_pop_1_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_pop_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn_and_split "*sibcall_pop_1_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i,i")))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_pop_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i,i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) + +(define_expand "call" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0); + DONE; +}) + +(define_expand "sibcall" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1); + DONE; +}) + +(define_insn_and_split "*call_0_vzeroupper" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_0" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] + "" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) + (match_operand 1 "" ""))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*sibcall_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) + (match_operand 1 "" ""))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_rex64_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) + (match_operand 1 "" ""))] + "TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) + (match_operand 1 "" "")) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64_ms_sysv" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) + (match_operand 1 "" "")) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_rex64_large_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64_large" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" ""))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*sibcall_1_rex64_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "z,U")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64" + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "z,U")) + (match_operand 1 "" ""))] + "TARGET_64BIT && SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +;; Call subroutine, returning value in operand 0 +(define_expand "call_value_pop" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 4 "" "")))])] + "!TARGET_64BIT" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4], 0); + DONE; +}) + +(define_expand "call_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 3 is not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], NULL, 0); + DONE; +}) + +(define_expand "sibcall_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 3 is not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], NULL, 1); + DONE; +}) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" +{ + int i; + + /* In order to give reg-stack an easier job in validating two + coprocessor registers as containing a possible return value, + simply pretend the untyped call returns a complex long double + value. + + We can't use SSE_REGPARM_MAX here since callee is unprototyped + and should have the default ABI. */ + + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, + GEN_INT ((TARGET_64BIT + ? (ix86_abi == SYSV_ABI + ? X86_64_SSE_REGPARM_MAX + : X86_64_MS_SSE_REGPARM_MAX) + : X86_32_SSE_REGPARM_MAX) + - 1), + NULL, 0); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; Prologue and epilogue instructions + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Do not schedule instructions accessing memory across this point. + +(define_expand "memory_blockage" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_blockage" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" + "" + [(set_attr "length" "0")]) + +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for PIC code +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)] + "" + "" + [(set_attr "length" "0")]) + +;; Insn emitted into the body of a function to return from a function. +;; This is only done if the function's epilogue is known to be simple. +;; See comments for ix86_can_use_return_insn_p in i386.c. + +(define_expand "return" + [(return)] + "ix86_can_use_return_insn_p ()" +{ + if (crtl->args.pops_args) + { + rtx popc = GEN_INT (crtl->args.pops_args); + emit_jump_insn (gen_return_pop_internal (popc)); + DONE; + } +}) + +(define_insn "return_internal" + [(return)] + "reload_completed" + "ret" + [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET +;; instruction Athlon and K8 have. + +(define_insn "return_internal_long" + [(return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep\;ret" + [(set_attr "length" "2") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "0") + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + +(define_insn "return_pop_internal" + [(return) + (use (match_operand:SI 0 "const_int_operand" ""))] + "reload_completed" + "ret\t%0" + [(set_attr "length" "3") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "2") + (set_attr "modrm" "0")]) + +(define_insn "return_indirect_internal" + [(return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Generate nops. Operand 0 is the number of nops, up to 8. +(define_insn "nops" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] + UNSPECV_NOPS)] + "reload_completed" +{ + int num = INTVAL (operands[0]); + + gcc_assert (num >= 1 && num <= 8); + + while (num--) + fputs ("\tnop\n", asm_out_file); + + return ""; +} + [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Pad to 16-byte boundary, max skip in op0. Used to avoid +;; branch prediction penalty for the third jump in a 16-byte +;; block on K8. + +(define_insn "pad" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef ASM_OUTPUT_MAX_SKIP_PAD + ASM_OUTPUT_MAX_SKIP_PAD (asm_out_file, 4, (int)INTVAL (operands[0])); +#else + /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. + The align insn is used to avoid 3 jump instructions in the row to improve + branch prediction and the benefits hardly outweigh the cost of extra 8 + nops on the average inserted by full alignment pseudo operation. */ +#endif + return ""; +} + [(set_attr "length" "16")]) + +(define_expand "prologue" + [(const_int 0)] + "" + "ix86_expand_prologue (); DONE;") + +(define_insn "set_got" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "* return output_set_got (operands[0], NULL_RTX);" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_labelled" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "* return output_set_got (operands[0], operands[1]);" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] + "TARGET_64BIT" + "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" + [(set_attr "type" "lea") + (set_attr "length_address" "4") + (set_attr "mode" "DI")]) + +(define_insn "set_rip_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(label_ref (match_operand 1 "" ""))] UNSPEC_SET_RIP))] + "TARGET_64BIT" + "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" + [(set_attr "type" "lea") + (set_attr "length_address" "4") + (set_attr "mode" "DI")]) + +(define_insn "set_got_offset_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT_OFFSET))] + "TARGET_64BIT" + "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" + [(set_attr "type" "imov") + (set_attr "length_immediate" "0") + (set_attr "length_address" "8") + (set_attr "mode" "DI")]) + +(define_expand "epilogue" + [(const_int 0)] + "" + "ix86_expand_epilogue (1); DONE;") + +(define_expand "sibcall_epilogue" + [(const_int 0)] + "" + "ix86_expand_epilogue (0); DONE;") + +(define_expand "eh_return" + [(use (match_operand 0 "register_operand" ""))] + "" +{ + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; + + /* Tricky bit: we write the address of the handler to which we will + be returning into someone else's stack frame, one word below the + stack address we wish to restore. */ + tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); + tmp = plus_constant (tmp, -UNITS_PER_WORD); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (tmp, ra); + + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 0)] + "ix86_expand_epilogue (2); DONE;") + +(define_insn "leave" + [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4))) + (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +(define_insn "leave_rex64" + [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8))) + (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +;; Handle -fsplit-stack. + +(define_expand "split_stack_prologue" + [(const_int 0)] + "" +{ + ix86_expand_split_stack_prologue (); + DONE; +}) + +;; In order to support the call/return predictor, we use a return +;; instruction which the middle-end doesn't see. +(define_insn "split_stack_return" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_SPLIT_STACK_RETURN)] + "" +{ + if (operands[0] == const0_rtx) + return "ret"; + else + return "ret\t%0"; +} + [(set_attr "atom_unit" "jeu") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (match_operand:SI 0 "const0_operand" "") + (const_int 1) + (const_int 3))) + (set (attr "length_immediate") + (if_then_else (match_operand:SI 0 "const0_operand" "") + (const_int 0) + (const_int 2)))]) + +;; If there are operand 0 bytes available on the stack, jump to +;; operand 1. + +(define_expand "split_stack_space_check" + [(set (pc) (if_then_else + (ltu (minus (reg SP_REG) + (match_operand 0 "register_operand" "")) + (unspec [(const_int 0)] UNSPEC_STACK_CHECK)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" +{ + rtx reg, size, limit; + + reg = gen_reg_rtx (Pmode); + size = force_reg (Pmode, operands[0]); + emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size)); + limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK); + limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit)); + ix86_expand_branch (GEU, reg, limit, operands[1]); + + DONE; +}) + +;; Bit manipulation instructions. + +(define_expand "ffs2" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (match_operand:SWI48 1 "nonimmediate_operand" "") + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "") + (ctz:SWI48 (match_dup 1)))]) + (set (match_dup 0) (if_then_else:SWI48 + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (mode == SImode && !TARGET_CMOVE) + { + emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1])); + DONE; + } + operands[2] = gen_reg_rtx (mode); +}) + +(define_insn_and_split "ffssi2_no_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&q")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_CMOVE" + "#" + "&& reload_completed" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[3] = gen_lowpart (QImode, operands[2]); + ix86_expand_clear (operands[2]); +}) + +(define_insn "*ffs_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 (match_dup 1)))] + "" + "bsf{}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "")]) + +(define_insn "ctz2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{}\t{%1, %0|%0, %1}"; + else + return "bsf{}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set (attr "prefix_rep") (symbol_ref "TARGET_BMI")) + (set_attr "mode" "")]) + +(define_expand "clz2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand" "") + (minus:SWI248 + (match_dup 2) + (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_ABM) + { + emit_insn (gen_clz2_abm (operands[0], operands[1])); + DONE; + } + operands[2] = GEN_INT (GET_MODE_BITSIZE (mode)-1); +}) + +(define_insn "clz2_abm" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM || TARGET_BMI" + "lzcnt{}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +;; BMI instructions. +(define_insn "*bmi_andn_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (not:SWI48 + (match_operand:SWI48 1 "register_operand" "r")) + (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "andn\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "bmi_bextr_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")] + UNSPEC_BEXTR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "bextr\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*bmi_blsi_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (neg:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsi\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*bmi_blsmsk_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*bmi_blsr_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsr\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +;; TBM instructions. +(define_insn "tbm_bextri_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "const_0_to_255_operand" "n") + (match_operand:SWI48 3 "const_0_to_255_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3])); + return "bextr\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blcfill_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blci_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (not:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1))) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blci\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blcic_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blcmsk_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blcs_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcs\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blsfill_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blsfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_blsic_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blsic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_t1mskc_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "t1mskc\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*tbm_tzmsk_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "tzmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "bsr_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsr{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI")]) + +(define_insn "*bsrhi" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "HI")]) + +(define_insn "popcount2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*popcount2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 (match_dup 1)))] + "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*popcountsi2_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI(popcount:SI (match_dup 1))))] + "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{l}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_expand "bswap2" + [(set (match_operand:SWI48 0 "register_operand" "") + (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "")))] + "" +{ + if (mode == SImode && !(TARGET_BSWAP || TARGET_MOVBE)) + { + rtx x = operands[0]; + + emit_move_insn (x, operands[1]); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + emit_insn (gen_rotlsi3 (x, x, GEN_INT (16))); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + DONE; + } +}) + +(define_insn "*bswap2_movbe" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m") + (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))] + "TARGET_MOVBE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + bswap\t%0 + movbe\t{%1, %0|%0, %1} + movbe\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip,imov,imov") + (set_attr "modrm" "0,1,1") + (set_attr "prefix_0f" "*,1,1") + (set_attr "prefix_extra" "*,1,1") + (set_attr "mode" "")]) + +(define_insn "*bswap2_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))] + "TARGET_BSWAP" + "bswap\t%0" + [(set_attr "type" "bitmanip") + (set_attr "modrm" "0") + (set_attr "mode" "")]) + +(define_insn "*bswaphi_lowpart_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)" + "@ + xchg{b}\t{%h0, %b0|%b0, %h0} + rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "2,4") + (set_attr "mode" "QI,HI")]) + +(define_insn "bswaphi_lowpart" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "4") + (set_attr "mode" "HI")]) + +(define_expand "paritydi2" + [(set (match_operand:DI 0 "register_operand" "") + (parity:DI (match_operand:DI 1 "register_operand" "")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX, + NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + if (TARGET_64BIT) + emit_insn (gen_zero_extendqidi2 (operands[0], scratch)); + else + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (tmp, scratch)); + emit_insn (gen_zero_extendsidi2 (operands[0], tmp)); + } + DONE; +}) + +(define_expand "paritysi2" + [(set (match_operand:SI 0 "register_operand" "") + (parity:SI (match_operand:SI 1 "register_operand" "")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); + DONE; +}) + +(define_insn_and_split "paritydi2_cmp" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:DI 3 "register_operand" "0")] + UNSPEC_PARITY)) + (clobber (match_scratch:DI 0 "=r")) + (clobber (match_scratch:SI 1 "=&r")) + (clobber (match_scratch:HI 2 "=Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:SI (match_dup 1) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] +{ + operands[4] = gen_lowpart (SImode, operands[3]); + + if (TARGET_64BIT) + { + emit_move_insn (operands[1], gen_lowpart (SImode, operands[3])); + emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32))); + } + else + operands[1] = gen_highpart (SImode, operands[3]); +}) + +(define_insn_and_split "paritysi2_cmp" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:SI 2 "register_operand" "0")] + UNSPEC_PARITY)) + (clobber (match_scratch:SI 0 "=r")) + (clobber (match_scratch:HI 1 "=&Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:HI (match_dup 1) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) + (clobber (match_dup 1))])] +{ + operands[3] = gen_lowpart (HImode, operands[2]); + + emit_move_insn (operands[1], gen_lowpart (HImode, operands[2])); + emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16))); +}) + +(define_insn "*parityhi2_cmp" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 1 "register_operand" "0")] + UNSPEC_PARITY)) + (clobber (match_scratch:HI 0 "=Q"))] + "! TARGET_POPCNT" + "xor{b}\t{%h0, %b0|%b0, %h0}" + [(set_attr "length" "2") + (set_attr "mode" "HI")]) + +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%a2@tlsgd(,%1,1), %0|%0, %a2@tlsgd[%1*1]}\;call\t%P3" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_global_dynamic_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(match_dup 2) + (match_operand:SI 1 "tls_symbolic_operand" "") + (match_dup 3)] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[2] = pic_offset_table_rtx; + else + { + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[2])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], operands[1], operands[2])); + DONE; + } + operands[3] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_global_dynamic_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" + { return ASM_BYTE "0x66\n\tlea{q}\t{%a1@tlsgd(%%rip), %%rdi|rdi, %a1@tlsgd[rip]}\n" ASM_SHORT "0x6666\n\trex64\n\tcall\t%P2"; } + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +(define_expand "tls_global_dynamic_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 2)) (const_int 0))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 2)] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[1] = pic_offset_table_rtx; + else + { + operands[1] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[1])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], ix86_tls_module_base (), operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" + "lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}\;call\t%P1" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_local_dynamic_base_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 1)) (const_int 0))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], ix86_tls_module_base ())); + DONE; + } + operands[1] = ix86_tls_get_addr (); +}) + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand:SI 3 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC FLAGS_REG))])]) + +;; Segment register for the thread base ptr load +(define_mode_attr tp_seg [(SI "gs") (DI "fs")]) + +;; Load and add the thread base pointer from %gs:0. +(define_insn "*load_tp_" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(const_int 0)] UNSPEC_TP))] + "" + "mov{}\t{%%:0, %0|%0, PTR :0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (unspec:P [(const_int 0)] UNSPEC_TP) + (match_operand:P 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "" + "add{}\t{%%:0, %0|%0, PTR :0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +;; The Sun linker took the AMD64 TLS spec literally and can only handle +;; %rax as destination of the initial executable code sequence. +(define_insn "tls_initial_exec_64_sun" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_IE_SUN)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_SUN_TLS" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}\n\tadd{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}" + [(set_attr "type" "multi")]) + +;; GNU2 TLS patterns can be split. + +(define_expand "tls_dynamic_gnu2_32" + [(set (match_dup 3) + (plus:SI (match_operand:SI 2 "register_operand" "") + (const:SI + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)))) + (parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 3) + (match_dup 2) (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "b") + (const:SI + (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}" + [(set_attr "type" "lea") + (set_attr "mode" "SI") + (set_attr "length" "6") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "") + (match_operand:SI 2 "register_operand" "0") + ;; we have to make sure %ebx still points to the GOT + (match_operand:SI 3 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_32" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI + (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "") + (match_operand:SI 4 "" "") + (match_operand:SI 2 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC) + (const:SI (unspec:SI + [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 5))] +{ + operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); +}) + +(define_expand "tls_dynamic_gnu2_64" + [(set (match_dup 2) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)) + (parallel + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}" + [(set_attr "type" "lea") + (set_attr "mode" "DI") + (set_attr "length" "7") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "") + (match_operand:DI 2 "register_operand" "0") + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_64" + [(set (match_operand:DI 0 "register_operand" "=&a") + (plus:DI + (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "") + (match_operand:DI 3 "" "") + (reg:DI SP_REG)] + UNSPEC_TLSDESC) + (const:DI (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); +}) + +;; These patterns match the binary 387 instructions for addM3, subM3, +;; mulM3 and divM3. There are three patterns for each of DFmode and +;; SFmode. The first is the normal insn, the second the same insn but +;; with one operand a conversion, and the third the same insn but with +;; the other operand a conversion. The conversion may be SFmode or +;; SImode if the target mode DFmode, but only SImode if the target mode +;; is SFmode. + +;; Gcc is slightly more smart about handling normal two address instructions +;; so use special patterns for add and mull. + +(define_insn "*fop__comm_mixed_avx" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "")]) + +(define_insn "*fop__comm_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "")]) + +(define_insn "*fop__comm_avx" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*fop__comm_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "mode" "")]) + +(define_insn "*fop__comm_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 && X87_ENABLE_ARITH (mode) + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop__1_mixed_avx" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "")]) + +(define_insn "*fop__1_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*rcpsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP))] + "TARGET_SSE_MATH" + "%vrcpss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*fop__1_avx" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*fop__1_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "mode" "")]) + +;; This pattern is not fully shadowed by the pattern above. +(define_insn "*fop__1_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && X87_ENABLE_ARITH (mode) + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +;; ??? Add SSE splitters for these! +(define_insn "*fop__2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(float:MODEF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:MODEF 2 "register_operand" "0,0")]))] + "TARGET_80387 && X87_ENABLE_FLOAT (mode, mode) + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop__3_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0,0") + (float:MODEF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && X87_ENABLE_FLOAT (mode, mode) + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_df_4_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:DF 2 "register_operand" "0,f")]))] + "TARGET_80387 && X87_ENABLE_ARITH (DFmode) + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_5_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,f") + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && X87_ENABLE_ARITH (DFmode) + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_6_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && X87_ENABLE_ARITH (DFmode) + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_comm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_1_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_2_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float:XF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:XF 2 "register_operand" "0,0")]))] + "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_xf_3_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,0") + (float:XF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_xf_4_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop_xf_5_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop_xf_6_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0,f")) + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(float (match_operand:X87MODEI12 1 "register_operand" "")) + (match_operand 2 "register_operand" "")]))] + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && X87_ENABLE_FLOAT (GET_MODE (operands[0]), GET_MODE (operands[1]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[4], + operands[2]))); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(match_operand 1 "register_operand" "") + (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && X87_ENABLE_FLOAT (GET_MODE (operands[0]), GET_MODE (operands[2]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[1], + operands[4]))); + ix86_free_from_memory (GET_MODE (operands[2])); + DONE; +}) + +;; FPU special functions. + +;; This pattern implements a no-op XFmode truncation for +;; all fancy i386 XFmode math functions. + +(define_insn "truncxf2_i387_noop_unspec" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_TRUNC_NOOP))] + "TARGET_USE_FANCY_MATH_387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_insn "sqrtxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) + +(define_insn "sqrt_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF + (float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) + +(define_insn "*rsqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" + "%vrsqrtss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" +{ + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; +}) + +(define_insn "*sqrt2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "%vsqrts\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*") + (set_attr "bdver1_decode" "*")]) + +(define_expand "sqrt2" + [(set (match_operand:MODEF 0 "register_operand" "") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "")))] + "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (mode)) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + if (mode == SFmode + && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = force_reg (mode, operands[1]); + + emit_insn (gen_sqrt_extendxf2_i387 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); + DONE; + } +}) + +(define_insn "fpremxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "fmodxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "fmod3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx (*gen_truncxf) (rtx, rtx); + + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_extendxf2 (op1, operands[1])); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + gen_truncxf = gen_truncxf2; + else + gen_truncxf = gen_truncxf2_i387_noop_unspec; + + emit_insn (gen_truncxf (operands[0], op1)); + DONE; +}) + +(define_insn "fprem1xf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM1_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM1_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "remainderxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "remainder3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx (*gen_truncxf) (rtx, rtx); + + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_extendxf2 (op1, operands[1])); + + emit_label (label); + + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + gen_truncxf = gen_truncxf2; + else + gen_truncxf = gen_truncxf2_i387_noop_unspec; + + emit_insn (gen_truncxf (operands[0], op1)); + DONE; +}) + +(define_insn "*sinxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*sin_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cosxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cos_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; When sincos pattern is defined, sin and cos builtin functions will be +;; expanded to sincos pattern with one of its outputs left unused. +;; CSE pass will figure out if two sincos patterns can be combined, +;; otherwise sincos pattern will be split back to sin or cos pattern, +;; depending on the unused output. + +(define_insn "sincosxf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]) + +(define_insn "sincos_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 1) + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 0) + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]) + +(define_expand "sincos3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_sincos_extendxf3_i387 (op0, op1, operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[1], op1)); + DONE; +}) + +(define_insn "fptanxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operand:XF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fptan_extendxf4_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operand:MODEF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "tanxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx one = gen_reg_rtx (XFmode); + rtx op2 = CONST1_RTX (XFmode); /* fld1 */ + + emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "tan2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx one = gen_reg_rtx (mode); + rtx op2 = CONST1_RTX (mode); /* fld1 */ + + emit_insn (gen_fptan_extendxf4_i387 (one, op0, + operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "*fpatanxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fpatan_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (float_extend:XF + (match_operand:MODEF 2 "register_operand" "u"))] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "atan2xf3" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "") + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations") + +(define_expand "atan23" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + emit_insn (gen_fpatan_extendxf3_i387 (op0, operands[2], operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "atanxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 2) + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "atan2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (mode); + emit_move_insn (op2, CONST1_RTX (mode)); /* fld1 */ + + emit_insn (gen_fpatan_extendxf3_i387 (op0, op2, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "asinxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 5) (match_dup 1)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asin2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_asinxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "acosxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1) (match_dup 5)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acos2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_acosxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2x_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */ +}) + +(define_expand "log2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log10xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */ +}) + +(define_expand "log102" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log2xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "log22" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xp1xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2xp1_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "log1pxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + ix86_emit_i387_log1p (operands[0], operands[1]); + DONE; +}) + +(define_expand "log1p2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + + operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]); + + ix86_emit_i387_log1p (op0, operands[1]); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxtractxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fxtract_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logbxf2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "operands[2] = gen_reg_rtx (XFmode);") + +(define_expand "logb2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "ilogbxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_expand "ilogb2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_insn "*f2xm1xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_F2XM1))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*fscalexf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FSCALE_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FSCALE_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fscale" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "expNcorexf3" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" ""))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 3; i < 10; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_expxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp10xf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp102" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_exp10xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp2xf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp22" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_exp2xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "expm1xf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 9) (float_extend:XF (match_dup 13))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 8) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 12) (minus:XF (match_dup 10) + (float_extend:XF (match_dup 13)))) + (set (match_operand:XF 0 "register_operand" "") + (plus:XF (match_dup 12) (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 13; i++) + operands[i] = gen_reg_rtx (XFmode); + + operands[13] + = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */ + + emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ +}) + +(define_expand "expm12" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_expm1xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "ldexpxf3" + [(set (match_dup 3) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 1) (match_dup 3)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexp3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:SI 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "scalbxf3" + [(parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 3) + (unspec:XF [(match_dup 1) (match_dup 2)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); +}) + +(define_expand "scalb3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1, op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_scalbxf3 (op0, op1, op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "significandxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_dup 2) + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "operands[2] = gen_reg_rtx (XFmode);") + +(define_expand "significand2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + + +(define_insn "sse4_1_round2" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_ROUND" + "%vrounds\t{%2, %1, %d0|%d0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn "rintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "frndint" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "rint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math) + { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x04))); + else + ix86_expand_rint (operands[0], operands[1]); + } + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_rintxf2 (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_expand "round2" + [(match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "nonimmediate_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_round (operands[0], operands[1]); + else + ix86_expand_rounddf_32 (operands[0], operands[1]); + DONE; +}) + +(define_insn_and_split "*fistdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fistdi2 (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); + emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))])]) + +(define_insn_and_split "*fist2_1" + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_with_temp (operands[0], operands[1], + operands[2])); + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_insn "fist2" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_insn "fist2_with_temp" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST)) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))]) + +(define_expand "lrintxf2" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387") + +(define_expand "lrint2" + [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")] + UNSPEC_FIX_NOTRUNC))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT)") + +(define_expand "lround2" + [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT) + && !flag_trapping_math && !flag_rounding_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + ix86_expand_lround (operands[0], operands[1]); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_floor" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + + emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_floor_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_expand "floorxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_floor (operands[0], operands[1])); + DONE; +}) + +(define_expand "floor2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x01))); + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_floorceil (operands[0], operands[1], true); + else + ix86_expand_floorceildf_32 (operands[0], operands[1], true); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_insn_and_split "*fist2_floor_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_floor (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_floor_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fistdi2_floor" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_floor_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])]) + +(define_insn "fist2_floor" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fist2_floor_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_expand "lfloorxf2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations") + +(define_expand "lfloor2" + [(match_operand:SWI48 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + if (TARGET_64BIT && optimize_insn_for_size_p ()) + FAIL; + ix86_expand_lfloorceil (operands[0], operands[1], true); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_ceil" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + + emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_ceil_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_expand "ceilxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_ceil (operands[0], operands[1])); + DONE; +}) + +(define_expand "ceil2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x02))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_floorceil (operands[0], operands[1], false); + else + ix86_expand_floorceildf_32 (operands[0], operands[1], false); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_insn_and_split "*fist2_ceil_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_ceil (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_ceil_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fistdi2_ceil" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_ceil_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])]) + +(define_insn "fist2_ceil" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fist2_ceil_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_expand "lceilxf2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations") + +(define_expand "lceil2" + [(match_operand:SWI48 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + ix86_expand_lfloorceil (operands[0], operands[1], false); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_trunc" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_TRUNC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + + emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_trunc_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_TRUNC)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_expand "btruncxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_trunc (operands[0], operands[1])); + DONE; +}) + +(define_expand "btrunc2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x03))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_trunc (operands[0], operands[1]); + else + ix86_expand_truncdf_32 (operands[0], operands[1]); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_trunc (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_mask_pm" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_MASK_PM] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); + + emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_mask_pm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_expand "nearbyintxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1])); + DONE; +}) + +(define_expand "nearbyint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxam2_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(match_operand:X87MODEF 1 "register_operand" "f")] + UNSPEC_FXAM))] + "TARGET_USE_FANCY_MATH_387" + "fxam\n\tfnstsw\t%0" + [(set_attr "type" "multi") + (set_attr "length" "4") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_insn_and_split "fxam2_i387_with_temp" + [(set (match_operand:HI 0 "register_operand" "") + (unspec:HI + [(match_operand:MODEF 1 "memory_operand" "")] + UNSPEC_FXAM_MEM))] + "TARGET_USE_FANCY_MATH_387 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 2)(match_dup 1)) + (set (match_dup 0) + (unspec:HI [(match_dup 2)] UNSPEC_FXAM))] +{ + operands[2] = gen_reg_rtx (mode); + + MEM_VOLATILE_P (operands[1]) = 1; +} + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_expand "isinfxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && TARGET_C99_FUNCTIONS" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); + + rtx cond; + + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) + +(define_expand "isinf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:MODEF 1 "nonimmediate_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && TARGET_C99_FUNCTIONS + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); + + rtx cond; + + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); + + /* Remove excess precision by forcing value through memory. */ + if (memory_operand (operands[1], VOIDmode)) + emit_insn (gen_fxam2_i387_with_temp (scratch, operands[1])); + else + { + enum ix86_stack_slot slot = (virtuals_instantiated + ? SLOT_TEMP + : SLOT_VIRTUAL); + rtx temp = assign_386_stack_local (mode, slot); + + emit_move_insn (temp, operands[1]); + emit_insn (gen_fxam2_i387_with_temp (scratch, temp)); + } + + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) + +(define_expand "signbitxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + DONE; +}) + +(define_insn "movmsk_df" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" + "%vmovmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +;; Use movmskpd in SSE mode to avoid store forwarding stall +;; for 32bit targets and movq+shrq sequence for 64bit targets. +(define_expand "signbitdf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" +{ + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH) + { + emit_insn (gen_movmsk_df (operands[0], operands[1])); + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + } + else + { + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamdf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + } + DONE; +}) + +(define_expand "signbitsf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" +{ + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamsf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + DONE; +}) + +;; Block operation instructions + +(define_insn "cld" + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] + "" + "cld" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +(define_expand "movmem" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SWI48 2 "nonmemory_operand" "")) + (use (match_operand:SWI48 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] + "" +{ + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strmov" + [(set (match_dup 4) (match_operand 3 "memory_operand" "")) + (set (match_operand 1 "memory_operand" "") (match_dup 4)) + (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1]))); + + /* If .md ever supports :P for Pmode, these can be directly + in the pattern above. */ + operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); + operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); + + /* Can't use this if the user has appropriated esi or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + { + emit_insn (gen_strmov_singleop (operands[0], operands[1], + operands[2], operands[3], + operands[5], operands[6])); + DONE; + } + + operands[4] = gen_reg_rtx (GET_MODE (operands[1])); +}) + +(define_expand "strmov_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 5 "" ""))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strmovdi_rex_1" + [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) + (mem:DI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 8))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 8)))] + "TARGET_64BIT + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "movsq" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "*strmovsi_1" + [(set (mem:SI (match_operand:P 2 "register_operand" "0")) + (mem:SI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 4))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 4)))] + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "movs{l|d}" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*strmovhi_1" + [(set (mem:HI (match_operand:P 2 "register_operand" "0")) + (mem:HI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 2))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 2)))] + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "*strmovqi_1" + [(set (mem:QI (match_operand:P 2 "register_operand" "0")) + (mem:QI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 1))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 1)))] + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_expand "rep_mov" + [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 5 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 6 "" "")) + (set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (use (match_dup 4))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_movdi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 3)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "TARGET_64BIT + && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "rep{%;} movsq" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "*rep_movsi" + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") + (const_int 2)) + (match_operand:P 3 "register_operand" "0"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (ashift:P (match_dup 5) (const_int 2)) + (match_operand:P 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "rep{%;} movs{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movqi" + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_operand:P 3 "register_operand" "0") + (match_operand:P 5 "register_operand" "2"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "rep{%;} movsb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_expand "setmem" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:SWI48 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] + "" +{ + if (ix86_expand_setmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strset" + [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (parallel [(set (match_operand 0 "register_operand" "") + (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (GET_MODE (operands[1]) != GET_MODE (operands[2])) + operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); + + /* If .md ever supports :P for Pmode, this can be directly + in the pattern above. */ + operands[3] = gen_rtx_PLUS (Pmode, operands[0], + GEN_INT (GET_MODE_SIZE (GET_MODE + (operands[2])))); + /* Can't use this if the user has appropriated eax or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])) + { + emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +(define_expand "strset_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 3 "" ""))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strsetdi_rex_1" + [(set (mem:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 8)))] + "TARGET_64BIT + && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "stosq" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*strsetsi_1" + [(set (mem:SI (match_operand:P 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 4)))] + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "stos{l|d}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*strsethi_1" + [(set (mem:HI (match_operand:P 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 2)))] + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*strsetqi_1" + [(set (mem:QI (match_operand:P 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 1)))] + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_expand "rep_stos" + [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "memory_operand" "") (const_int 0)) + (use (match_operand 3 "register_operand" "")) + (use (match_dup 1))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_stosdi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:DI 2 "register_operand" "a")) + (use (match_dup 4))] + "TARGET_64BIT + && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "rep{%;} stosq" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*rep_stossi" + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") + (const_int 2)) + (match_operand:P 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4))] + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "rep{%;} stos{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*rep_stosqi" + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_operand:P 3 "register_operand" "0") + (match_operand:P 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4))] + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "rep{%;} stosb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_expand "cmpstrnsi" + [(set (match_operand:SI 0 "register_operand" "") + (compare:SI (match_operand:BLK 1 "general_operand" "") + (match_operand:BLK 2 "general_operand" ""))) + (use (match_operand 3 "general_operand" "")) + (use (match_operand 4 "immediate_operand" ""))] + "" +{ + rtx addr1, addr2, out, outlow, count, countreg, align; + + if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) + FAIL; + + /* Can't use this if the user has appropriated ecx, esi or edi. */ + if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) + FAIL; + + out = operands[0]; + if (!REG_P (out)) + out = gen_reg_rtx (SImode); + + addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); + if (addr1 != XEXP (operands[1], 0)) + operands[1] = replace_equiv_address_nv (operands[1], addr1); + if (addr2 != XEXP (operands[2], 0)) + operands[2] = replace_equiv_address_nv (operands[2], addr2); + + count = operands[3]; + countreg = ix86_zero_extend_to_Pmode (count); + + /* %%% Iff we are testing strict equality, we can use known alignment + to good advantage. This may be possible with combine, particularly + once cc0 is dead. */ + align = operands[4]; + + if (CONST_INT_P (count)) + { + if (INTVAL (count) == 0) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + } + else + { + rtx (*gen_cmp) (rtx, rtx); + + gen_cmp = (TARGET_64BIT + ? gen_cmpdi_1 : gen_cmpsi_1); + + emit_insn (gen_cmp (countreg, countreg)); + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + } + + outlow = gen_lowpart (QImode, out); + emit_insn (gen_cmpintqi (outlow)); + emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow)); + + if (operands[0] != out) + emit_move_insn (operands[0], out); + + DONE; +}) + +;; Produce a tri-state integer (-1, 0, 1) from condition codes. + +(define_expand "cmpintqi" + [(set (match_dup 1) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_dup 2) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (parallel [(set (match_operand:QI 0 "register_operand" "") + (minus:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + operands[1] = gen_reg_rtx (QImode); + operands[2] = gen_reg_rtx (QImode); +}) + +;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is +;; zero. Emit extra code to make sure that a zero-length compare is EQ. + +(define_expand "cmpstrnqi_nz_1" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" ""))) + (use (match_operand 2 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_nz_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) + (mem:BLK (match_operand:P 5 "register_operand" "1")))) + (use (match_operand:P 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:P 0 "register_operand" "=S")) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (match_operand:P 2 "register_operand" "=c"))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "repz{%;} cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) + (set_attr "prefix_rep" "1")]) + +;; The same, but the count is not known to not be zero. + +(define_expand "cmpstrnqi_1" + [(parallel [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 2 "register_operand" "") + (const_int 0)) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" "")) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_1" + [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) + (mem:BLK (match_operand:P 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand:P 0 "register_operand" "=S")) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (match_operand:P 2 "register_operand" "=c"))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "repz{%;} cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) + (set_attr "prefix_rep" "1")]) + +(define_expand "strlen" + [(set (match_operand:P 0 "register_operand" "") + (unspec:P [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] + UNSPEC_SCAS))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlenqi_1" + [(parallel [(set (match_operand 0 "register_operand" "") + (match_operand 2 "" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strlenqi_1" + [(set (match_operand:P 0 "register_operand" "=&c") + (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:P 3 "immediate_operand" "i") + (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS)) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "repnz{%;} scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) + (set_attr "prefix_rep" "1")]) + +;; Peephole optimizations to clean up after cmpstrn*. This should be +;; handled in combine, but it is not currently up to the task. +;; When used for their truth value, the cmpstrn* expanders generate +;; code like this: +;; +;; repz cmpsb +;; seta %al +;; setb %dl +;; cmpb %al, %dl +;; jcc label +;; +;; The intermediate three instructions are unnecessary. + +;; This one handles cmpstrn*_nz_1... +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" "")))) + (use (match_operand 6 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5)))) + (use (match_dup 6)) + (use (match_dup 3)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])]) + +;; ...and this one handles cmpstrn*_1. +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 6 "register_operand" "") + (const_int 0)) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" ""))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_dup 6) + (const_int 0)) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5))) + (const_int 0))) + (use (match_dup 3)) + (use (reg:CC FLAGS_REG)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])]) + +;; Conditional move instructions. + +(define_expand "movcc" + [(set (match_operand:SWIM 0 "register_operand" "") + (if_then_else:SWIM (match_operand 1 "ordered_comparison_operator" "") + (match_operand:SWIM 2 "general_operand" "") + (match_operand:SWIM 3 "general_operand" "")))] + "" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing +;; the register first winds up with `sbbl $0,reg', which is also weird. +;; So just document what we're doing explicitly. + +(define_expand "x86_movcc_0_m1" + [(parallel + [(set (match_operand:SWI48 0 "register_operand" "") + (if_then_else:SWI48 + (match_operator:SWI48 2 "ix86_carry_flag_operator" + [(match_operand 1 "flags_reg_operand" "") + (const_int 0)]) + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*x86_movcc_0_m1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_movcc_0_m1_se" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_movcc_0_m1_neg" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (neg:SWI48 (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "") + (set_attr "length_immediate" "0")]) + +(define_insn "*movcc_noc" + [(set (match_operand:SWI248 0 "register_operand" "=r,r") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "nonimmediate_operand" "rm,0") + (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))] + "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "")]) + +(define_insn_and_split "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 2) + (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + +(define_expand "movcc" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (if_then_else:X87MODEF + (match_operand 1 "ix86_fp_comparison_operator" "") + (match_operand:X87MODEF 2 "register_operand" "") + (match_operand:X87MODEF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") + +(define_insn "*movxfcc_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:XF 2 "register_operand" "f,0") + (match_operand:XF 3 "register_operand" "0,f")))] + "TARGET_80387 && TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +(define_insn "*movdfcc_1_rex64" + [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "DF,DF,DI,DI")]) + +(define_insn "*movdfcc_1" + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + # + #" + [(set_attr "type" "fcmov,fcmov,multi,multi") + (set_attr "mode" "DF,DF,DI,DI")]) + +(define_split + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "") + (match_operand:DF 3 "nonimmediate_operand" "")))] + "!TARGET_64BIT && reload_completed" + [(set (match_dup 2) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 5) + (match_dup 6))) + (set (match_dup 3) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 7) + (match_dup 8)))] +{ + split_double_mode (DImode, &operands[2], 2, &operands[5], &operands[7]); + split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]); +}) + +(define_insn "*movsfcc_1_387" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "SF,SF,SI,SI")]) + +;; All moves in XOP pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; XOP conditional move +(define_insn "*xop_pcmov_" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "register_operand" "x")))] + "TARGET_XOP" + "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "*avx_3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "s\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*avx_ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vmins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "mins\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +(define_insn "*avx_ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vmaxs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "maxs\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand" "") + (match_operand 1 "fp_register_operand" "")) + (set (match_dup 0) + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] + + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. + "if (COMMUTATIVE_ARITH_P (operands[2])) + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + operands[0], operands[1]); + else + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + operands[1], operands[0]);") + +;; Conditional addition patterns +(define_expand "addcc" + [(match_operand:SWI 0 "register_operand" "") + (match_operand 1 "ordered_comparison_operator" "") + (match_operand:SWI 2 "register_operand" "") + (match_operand:SWI 3 "const_int_operand" "")] + "" + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") + +;; Misc patterns (?) + +;; This pattern exists to put a dependency on all ebp-based memory accesses. +;; Otherwise there will be nothing to keep +;; +;; [(set (reg ebp) (reg esp))] +;; [(set (reg esp) (plus (reg esp) (const_int -160000))) +;; (clobber (eflags)] +;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] +;; +;; in proper program order. + +(define_insn "pro_epilogue_adjust_stack__add" + [(set (match_operand:P 0 "register_operand" "=r,r") + (plus:P (match_operand:P 1 "register_operand" "0,r") + (match_operand:P 2 "" "r,l"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], mode)) + return "sub{}\t{%2, %0|%0, %2}"; + + return "add{}\t{%2, %0|%0, %2}"; + + default: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{}\t{%a2, %0|%0, %a2}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "0") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) + (const_string "alu") + (match_operand: 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set (attr "length_immediate") + (cond [(eq_attr "type" "imov") + (const_string "0") + (and (eq_attr "type" "alu") + (match_operand 2 "const128_operand" "")) + (const_string "1") + ] + (const_string "*"))) + (set_attr "mode" "")]) + +(define_insn "pro_epilogue_adjust_stack__sub" + [(set (match_operand:P 0 "register_operand" "=r") + (minus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" + "sub{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "allocate_stack_worker_probe_" + [(set (match_operand:P 0 "register_operand" "=a") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] + UNSPECV_STACK_PROBE)) + (clobber (reg:CC FLAGS_REG))] + "ix86_target_stack_probe ()" + "call\t___chkstk_ms" + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_expand "allocate_stack" + [(match_operand 0 "register_operand" "") + (match_operand 1 "general_operand" "")] + "ix86_target_stack_probe ()" +{ + rtx x; + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT 0 +#endif + + if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < CHECK_STACK_LIMIT) + { + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1], + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); + } + else + { + x = copy_to_mode_reg (Pmode, operands[1]); + if (TARGET_64BIT) + emit_insn (gen_allocate_stack_worker_probe_di (x, x)); + else + emit_insn (gen_allocate_stack_worker_probe_si (x, x)); + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x, + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); + } + + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; +}) + +;; Use IOR for stack probes, this is shorter. +(define_expand "probe_stack" + [(match_operand 0 "memory_operand" "")] + "" +{ + rtx (*gen_ior3) (rtx, rtx, rtx); + + gen_ior3 = (GET_MODE (operands[0]) == DImode + ? gen_iordi3 : gen_iorsi3); + + emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx)); + DONE; +}) + +(define_insn "adjust_stack_and_probe" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] + UNSPECV_PROBE_STACK_RANGE)) + (set (reg:P SP_REG) + (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" + "* return output_adjust_stack_and_probe (operands[0]);" + [(set_attr "type" "multi")]) + +(define_insn "probe_stack_range" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "const_int_operand" "n")] + UNSPECV_PROBE_STACK_RANGE)) + (clobber (reg:CC FLAGS_REG))] + "" + "* return output_probe_stack_range (operands[0], operands[2]);" + [(set_attr "type" "multi")]) + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "!TARGET_64BIT && flag_pic" +{ +#if TARGET_MACHO + if (TARGET_MACHO) + { + rtx xops[3]; + rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + rtx label_rtx = gen_label_rtx (); + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + xops[0] = xops[1] = picreg; + xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx)); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else +#endif + emit_insn (gen_set_got (pic_offset_table_rtx)); + DONE; +}) + +;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "promotable_binary_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "aligned_operand" "")])) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ((GET_MODE (operands[0]) == HImode + && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX) + /* ??? next two lines just !satisfies_constraint_K (...) */ + || !CONST_INT_P (operands[2]) + || satisfies_constraint_K (operands[2]))) + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + if (GET_CODE (operands[3]) != ASHIFT) + operands[2] = gen_lowpart (SImode, operands[2]); + PUT_MODE (operands[3], SImode);") + +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(and (match_operand 3 "aligned_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)])) + (set (match_operand 1 "register_operand" "") + (and (match_dup 3) (match_dup 4)))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && optimize_insn_for_speed_p () + && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) + (const_int 0)])) + (set (match_dup 1) + (and:SI (match_dup 3) (match_dup 4)))])] +{ + operands[4] + = gen_int_mode (INTVAL (operands[4]) + & GET_MODE_MASK (GET_MODE (operands[1])), SImode); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) + +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand:HI 2 "aligned_operand" "") + (match_operand:HI 3 "const_int_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ! TARGET_FAST_PREFIX + && optimize_insn_for_speed_p () + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[3] + = gen_int_mode (INTVAL (operands[3]) + & GET_MODE_MASK (GET_MODE (operands[2])), SImode); + operands[2] = gen_lowpart (SImode, operands[2]); +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (neg (match_operand 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(parallel [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (not (match_operand 1 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(set (match_dup 0) + (not:SI (match_dup 1)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "ordered_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand 2 "register_operand" "") + (match_operand 3 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);") + +;; RTL Peephole optimizations, run before sched2. These primarily look to +;; transform a complex memory operation into two memory to register operations. + +;; Don't push memory operands +(define_peephole2 + [(set (match_operand:SWI 0 "push_operand" "") + (match_operand:SWI 1 "memory_operand" "")) + (match_scratch:SWI 2 "")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; We need to handle SFmode only, because DFmode and XFmode are split to +;; SImode pushes. +(define_peephole2 + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" "")) + (match_scratch:SF 2 "r")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; Don't move an immediate directly to memory when the instruction +;; gets too big. +(define_peephole2 + [(match_scratch:SWI124 1 "") + (set (match_operand:SWI124 0 "memory_operand" "") + (const_int 0))] + "optimize_insn_for_speed_p () + && !TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_lowpart (SImode, operands[1]);") + +(define_peephole2 + [(match_scratch:SWI124 2 "") + (set (match_operand:SWI124 0 "memory_operand" "") + (match_operand:SWI124 1 "immediate_operand" ""))] + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; Don't compare memory with zero, load and use a test instead. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand:SI 2 "memory_operand" "") + (const_int 0)])) + (match_scratch:SI 3 "r")] + "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]) + +;; NOT is not pairable on Pentium, while XOR is, but one byte longer. +;; Don't split NOTs with a displacement operand, because resulting XOR +;; will not be pairable anyway. +;; +;; On AMD K6, NOT is vector decoded with memory operand that cannot be +;; represented using a modRM byte. The XOR replacement is long decoded, +;; so this split helps here as well. +;; +;; Note: Can't do this as a regular split because we can't get proper +;; lifetime information then. + +(define_peephole2 + [(set (match_operand:SWI124 0 "nonimmediate_operand" "") + (not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand" "")))] + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], mode))) + || (TARGET_NOT_VECTORMODE + && long_memory_operand (operands[0], mode))) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (xor:SWI124 (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Non pairable "test imm, reg" instructions can be translated to +;; "and imm, reg" if reg dies. The "and" form is also shorter (one +;; byte opcode instead of two, have a short form for byte operands), +;; so do it for other CPUs as well. Given that the value was dead, +;; this should not create any new dependencies. Pass on the sub-word +;; versions if we're concerned about partial register stalls. + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode) + && (true_regnum (operands[2]) != AX_REG + || satisfies_constraint_K (operands[3])) + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:SI (match_dup 2) (match_dup 3)))])]) + +;; We don't need to handle HImode case, because it will be promoted to SImode +;; on ! TARGET_PARTIAL_REG_STALL + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:QI (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "immediate_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:QI (match_dup 2) (match_dup 3)))])]) + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI + (zero_extract:SI + (match_operand 2 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 + [(and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)))])]) + +;; Don't do logical operations with memory inputs. +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "memory_operand" "") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2) (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])]) + +;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address +;; refers to the destination of the load! + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (match_dup 0) + (match_operator:SI 3 "commutative_operator" + [(match_dup 0) + (match_operand:SI 2 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "REGNO (operands[0]) != REGNO (operands[1]) + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 4)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);") + +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" "")) + (set (match_dup 0) + (match_operator 3 "commutative_operator" + [(match_dup 0) + (match_operand 2 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1]) + && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) + || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) + +; Don't do logical operations with memory outputs +; +; These two don't make sense for PPro/PII -- we're expanding a 4-uop +; instruction into two 1-uop insns plus a 2-uop insn. That last has +; the same decoder scheduling characteristics as the original. + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 2) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))]) + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "nonmemory_operand" "") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))]) + +;; Attempt to always use XOR for zeroing registers. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "const0_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && GENERAL_REG_P (operands[0]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (word_mode, operands[0]);") + +(define_peephole2 + [(set (strict_low_part (match_operand 0 "register_operand" "")) + (const_int 0))] + "(GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode) + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])]) + +;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg. +(define_peephole2 + [(set (match_operand:SWI248 0 "register_operand" "") + (const_int -1))] + "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int -1)) + (clobber (reg:CC FLAGS_REG))])] +{ + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +;; Attempt to convert simple lea to add/shift. +;; These can be created by move expanders. + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand" "") + (plus:SWI48 (match_dup 0) + (match_operand:SWI48 1 "" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")) 0))] + "TARGET_64BIT + && peep2_regno_dead_p (0, FLAGS_REG) + && REGNO (operands[0]) == REGNO (operands[1])" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_dup 0) + (match_operand:SWI48 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" "")) 0))] + "TARGET_64BIT + && exact_log2 (INTVAL (operands[2])) >= 0 + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") + +;; The ESP adjustments can be done by the push and pop instructions. Resulting +;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes. +;; On many CPUs it is also faster, since special hardware to avoid esp +;; dependencies is present. + +;; While some of these conversions may be done using splitters, we use +;; peepholes in order to allow combine_stack_adjustments pass to see +;; nonobfuscated RTL. + +;; Convert prologue esp subtractions to push. +;; We need register to push. In order to keep verify_flow_info happy we have +;; two choices +;; - use scratch and clobber it in order to avoid dependencies +;; - use already live register +;; We can't use the second way right now, since there is no reliable way how to +;; verify that given register is live. First choice will also most likely in +;; fewer dependencies. On the place of esp adjustments it is very likely that +;; call clobbered registers are dead. We may want to use base pointer as an +;; alternative when no register is available later. + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_SINGLE_POP || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)" + [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))])]) + +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. +(define_peephole2 + [(match_scratch:P 1 "r") + (match_scratch:P 2 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))]) + (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))]) + (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)" + [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. +(define_peephole2 + [(match_scratch:P 1 "r") + (match_scratch:P 2 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + +;; Convert compares with 1 to shorter inc/dec operations when CF is not +;; required and register dies. Similarly for 128 to -128. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")]))] + "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ()) + && incdec_operand (operands[3], GET_MODE (operands[3]))) + || (!TARGET_FUSE_CMP_AND_BRANCH + && INTVAL (operands[3]) == 128)) + && ix86_match_ccmode (insn, CCGCmode) + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)])) + (clobber (match_dup 2))])]) + +;; Convert imul by three, five and nine into lea +(define_peephole2 + [(parallel + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "register_operand" "") + (match_operand:SWI48 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9" + [(set (match_dup 0) + (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2)) + (match_dup 1)))] + "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") + +(define_peephole2 + [(parallel + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:SWI48 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2)) + (match_dup 0)))] + "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") + +;; imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, reg, reg is direct decoded. +(define_peephole2 + [(match_scratch:SWI48 3 "r") + (parallel [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "memory_operand" "") + (match_operand:SWI48 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) + (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])]) + +;; imul $8/16bit_imm, regmem, reg is vector decoded. +;; Convert it into imul reg, reg +;; It would be better to force assembler to encode instruction using long +;; immediate, but there is apparently no way to do so. +(define_peephole2 + [(parallel [(set (match_operand:SWI248 0 "register_operand" "") + (mult:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "") + (match_operand:SWI248 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:SWI248 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +;; After splitting up read-modify operations, array accesses with memory +;; operands might end up in form: +;; sall $2, %eax +;; movl 4(%esp), %edx +;; addl %edx, %eax +;; instead of pre-splitting: +;; sall $2, %eax +;; addl 4(%esp), %eax +;; Turn it into: +;; movl 4(%esp), %edx +;; leal (%edx,%eax,4), %eax + +(define_peephole2 + [(match_scratch:P 5 "r") + (parallel [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 3 "register_operand" "") + (plus (match_dup 0) + (match_operand 4 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "IN_RANGE (INTVAL (operands[2]), 1, 3) + /* Validate MODE for lea. */ + && ((!TARGET_PARTIAL_REG_STALL + && (GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode)) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + && (rtx_equal_p (operands[0], operands[3]) + || peep2_reg_dead_p (2, operands[0])) + /* We reorder load and the shift. */ + && !reg_overlap_mentioned_p (operands[0], operands[4])" + [(set (match_dup 5) (match_dup 4)) + (set (match_dup 0) (match_dup 1))] +{ + enum machine_mode op1mode = GET_MODE (operands[1]); + enum machine_mode mode = op1mode == DImode ? DImode : SImode; + int scale = 1 << INTVAL (operands[2]); + rtx index = gen_lowpart (Pmode, operands[1]); + rtx base = gen_lowpart (Pmode, operands[5]); + rtx dest = gen_lowpart (mode, operands[3]); + + operands[1] = gen_rtx_PLUS (Pmode, base, + gen_rtx_MULT (Pmode, index, GEN_INT (scale))); + operands[5] = base; + if (mode != Pmode) + operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + if (op1mode != Pmode) + operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0); + operands[0] = dest; +}) + +;; Call-value patterns last so that the wildcard operand does not +;; disrupt insn-recog's switch tables. + +(define_insn_and_split "*call_value_pop_0_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "")))]) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_pop_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "")))] + "!TARGET_64BIT" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_pop_1_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))]) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*sibcall_value_pop_1_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i,i")))]) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i,i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_0_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_0_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" "")))] + "TARGET_64BIT" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64_ms_sysv" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*sibcall_value_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) + (match_operand:DI 2 "" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64_ms_sysv" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) + (match_operand:DI 2 "" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_rex64_large_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64_large" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "z,U")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "z,U")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && SIBLING_CALL_P (insn)" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. +;; That, however, is usually mapped by the OS to SIGSEGV, which is often +;; caught for use by garbage collectors and the like. Using an insn that +;; maps to SIGILL makes it more likely the program will rightfully die. +;; Keeping with tradition, "6" is in honor of #UD. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 6))] + "" + { return ASM_SHORT "0x0b0f"; } + [(set_attr "length" "2")]) + +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + + gcc_assert (rw == 0 || rw == 1); + gcc_assert (locality >= 0 && locality <= 3); + gcc_assert (GET_MODE (operands[0]) == Pmode + || GET_MODE (operands[0]) == VOIDmode); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + supported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; +}) + +(define_insn "*prefetch_sse_" + [(prefetch (match_operand:P 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); + + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0])")) + (set_attr "memory" "none")]) + +(define_insn "*prefetch_3dnow_" + [(prefetch (match_operand:P 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0])")) + (set_attr "memory" "none")]) + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")] + "" +{ + rtx (*insn)(rtx, rtx); + +#ifdef TARGET_THREAD_SSP_OFFSET + operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET); + insn = (TARGET_64BIT + ? gen_stack_tls_protect_set_di + : gen_stack_tls_protect_set_si); +#else + insn = (TARGET_64BIT + ? gen_stack_protect_set_di + : gen_stack_protect_set_si); +#endif + + emit_insn (insn (operands[0], operands[1])); + DONE; +}) + +(define_insn "stack_protect_set_" + [(set (match_operand:P 0 "memory_operand" "=m") + (unspec:P [(match_operand:P 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:P 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{}\t{%1, %2|%2, %1}\;mov{}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_" + [(set (match_operand:P 0 "memory_operand" "=m") + (unspec:P [(match_operand:P 1 "const_int_operand" "i")] + UNSPEC_SP_TLS_SET)) + (set (match_scratch:P 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{}\t{%@:%P1, %2|%2, PTR %@:%P1}\;mov{}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + + rtx (*insn)(rtx, rtx, rtx); + +#ifdef TARGET_THREAD_SSP_OFFSET + operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET); + insn = (TARGET_64BIT + ? gen_stack_tls_protect_test_di + : gen_stack_tls_protect_test_si); +#else + insn = (TARGET_64BIT + ? gen_stack_protect_test_di + : gen_stack_protect_test_si); +#endif + + emit_insn (insn (flags, operands[0], operands[1])); + + emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx), + flags, const0_rtx, operands[2])); + DONE; +}) + +(define_insn "stack_protect_test_" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:P 1 "memory_operand" "m") + (match_operand:P 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:P 3 "=&r"))] + "" + "mov{}\t{%1, %3|%3, %1}\;xor{}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:P 1 "memory_operand" "m") + (match_operand:P 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:P 3 "=r"))] + "" + "mov{}\t{%1, %3|%3, %1}\;xor{}\t{%@:%P2, %3|%3, PTR %@:%P2}" + [(set_attr "type" "multi")]) + +(define_insn "sse4_2_crc32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SWI124 2 "nonimmediate_operand" "m")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 || TARGET_CRC32" + "crc32{}\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set (attr "prefix_data16") + (if_then_else (match_operand:HI 2 "" "") + (const_string "1") + (const_string "*"))) + (set (attr "prefix_rex") + (if_then_else (match_operand:QI 2 "ext_QIreg_operand" "") + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)" + "crc32{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + +(define_expand "rdpmc" + [(match_operand:DI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")] + "" +{ + rtx reg = gen_reg_rtx (DImode); + rtx si; + + /* Force operand 1 into ECX. */ + rtx ecx = gen_rtx_REG (SImode, CX_REG); + emit_insn (gen_rtx_SET (VOIDmode, ecx, operands[1])); + si = gen_rtx_UNSPEC_VOLATILE (DImode, gen_rtvec (1, ecx), + UNSPECV_RDPMC); + + if (TARGET_64BIT) + { + rtvec vec = rtvec_alloc (2); + rtx load = gen_rtx_PARALLEL (VOIDmode, vec); + rtx upper = gen_reg_rtx (DImode); + rtx di = gen_rtx_UNSPEC_VOLATILE (DImode, + gen_rtvec (1, const0_rtx), + UNSPECV_RDPMC); + RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, reg, si); + RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, upper, di); + emit_insn (load); + upper = expand_simple_binop (DImode, ASHIFT, upper, GEN_INT (32), + NULL, 1, OPTAB_DIRECT); + reg = expand_simple_binop (DImode, IOR, reg, upper, reg, 1, + OPTAB_DIRECT); + } + else + emit_insn (gen_rtx_SET (VOIDmode, reg, si)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], reg)); + DONE; +}) + +(define_insn "*rdpmc" + [(set (match_operand:DI 0 "register_operand" "=A") + (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] + UNSPECV_RDPMC))] + "!TARGET_64BIT" + "rdpmc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_insn "*rdpmc_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")] + UNSPECV_RDPMC)) + (set (match_operand:DI 1 "register_operand" "=d") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPMC))] + "TARGET_64BIT" + "rdpmc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_expand "rdtsc" + [(set (match_operand:DI 0 "register_operand" "") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] + "" +{ + if (TARGET_64BIT) + { + rtvec vec = rtvec_alloc (2); + rtx load = gen_rtx_PARALLEL (VOIDmode, vec); + rtx upper = gen_reg_rtx (DImode); + rtx lower = gen_reg_rtx (DImode); + rtx src = gen_rtx_UNSPEC_VOLATILE (DImode, + gen_rtvec (1, const0_rtx), + UNSPECV_RDTSC); + RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, lower, src); + RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, upper, src); + emit_insn (load); + upper = expand_simple_binop (DImode, ASHIFT, upper, GEN_INT (32), + NULL, 1, OPTAB_DIRECT); + lower = expand_simple_binop (DImode, IOR, lower, upper, lower, 1, + OPTAB_DIRECT); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], lower)); + DONE; + } +}) + +(define_insn "*rdtsc" + [(set (match_operand:DI 0 "register_operand" "=A") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] + "!TARGET_64BIT" + "rdtsc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_insn "*rdtsc_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC)) + (set (match_operand:DI 1 "register_operand" "=d") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] + "TARGET_64BIT" + "rdtsc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_expand "rdtscp" + [(match_operand:DI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")] + "" +{ + rtx di = gen_rtx_UNSPEC_VOLATILE (DImode, + gen_rtvec (1, const0_rtx), + UNSPECV_RDTSCP); + rtx si = gen_rtx_UNSPEC_VOLATILE (SImode, + gen_rtvec (1, const0_rtx), + UNSPECV_RDTSCP); + rtx reg = gen_reg_rtx (DImode); + rtx tmp = gen_reg_rtx (SImode); + + if (TARGET_64BIT) + { + rtvec vec = rtvec_alloc (3); + rtx load = gen_rtx_PARALLEL (VOIDmode, vec); + rtx upper = gen_reg_rtx (DImode); + RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, reg, di); + RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, upper, di); + RTVEC_ELT (vec, 2) = gen_rtx_SET (VOIDmode, tmp, si); + emit_insn (load); + upper = expand_simple_binop (DImode, ASHIFT, upper, GEN_INT (32), + NULL, 1, OPTAB_DIRECT); + reg = expand_simple_binop (DImode, IOR, reg, upper, reg, 1, + OPTAB_DIRECT); + } + else + { + rtvec vec = rtvec_alloc (2); + rtx load = gen_rtx_PARALLEL (VOIDmode, vec); + RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, reg, di); + RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, tmp, si); + emit_insn (load); + } + emit_insn (gen_rtx_SET (VOIDmode, operands[0], reg)); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], tmp)); + DONE; +}) + +(define_insn "*rdtscp" + [(set (match_operand:DI 0 "register_operand" "=A") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) + (set (match_operand:SI 1 "register_operand" "=c") + (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))] + "!TARGET_64BIT" + "rdtscp" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +(define_insn "*rdtscp_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) + (set (match_operand:DI 1 "register_operand" "=d") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) + (set (match_operand:SI 2 "register_operand" "=c") + (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))] + "TARGET_64BIT" + "rdtscp" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; LWP instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "lwp_llwpcb" + [(unspec_volatile [(match_operand 0 "register_operand" "r")] + UNSPECV_LLWP_INTRINSIC)] + "TARGET_LWP") + +(define_insn "*lwp_llwpcb1" + [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] + UNSPECV_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "") + (set_attr "length" "5")]) + +(define_expand "lwp_slwpcb" + [(set (match_operand 0 "register_operand" "=r") + (unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] + "TARGET_LWP" +{ + if (TARGET_64BIT) + emit_insn (gen_lwp_slwpcbdi (operands[0])); + else + emit_insn (gen_lwp_slwpcbsi (operands[0])); + DONE; +}) + +(define_insn "lwp_slwpcb" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "") + (set_attr "length" "5")]) + +(define_expand "lwp_lwpval3" + [(unspec_volatile [(match_operand:SWI48 1 "register_operand" "r") + (match_operand:SI 2 "nonimmediate_operand" "rm") + (match_operand:SI 3 "const_int_operand" "i")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + ;; Avoid unused variable warning. + "(void) operands[0];") + +(define_insn "*lwp_lwpval3_1" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) + +(define_expand "lwp_lwpins3" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand" "r") + (match_operand:SI 2 "nonimmediate_operand" "rm") + (match_operand:SI 3 "const_int_operand" "i")] + UNSPECV_LWPINS_INTRINSIC)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))] + "TARGET_LWP") + +(define_insn "*lwp_lwpins3_1" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPECV_LWPINS_INTRINSIC))] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) + +(define_insn "rdfsbase" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDFSBASE))] + "TARGET_64BIT && TARGET_FSGSBASE" + "rdfsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "rdgsbase" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDGSBASE))] + "TARGET_64BIT && TARGET_FSGSBASE" + "rdgsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "wrfsbase" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] + UNSPECV_WRFSBASE)] + "TARGET_64BIT && TARGET_FSGSBASE" + "wrfsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "wrgsbase" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] + UNSPECV_WRGSBASE)] + "TARGET_64BIT && TARGET_FSGSBASE" + "wrgsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "rdrand_1" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND)) + (set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))] + "TARGET_RDRND" + "rdrand\t%0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "1")]) + +(include "mmx.md") +(include "sse.md") +(include "sync.md") -- cgit v1.2.3